summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2012-12-24 20:08:00 +0100
committerHans Hagen <pragma@wxs.nl>2012-12-24 20:08:00 +0100
commit5f48570bba149ac17f45c80d5ee95306aa69d0c9 (patch)
treebfd405cdf69acd82e4352f142f3b0220abde6ed5 /scripts
parentd2d2f5a76a2323d1adafe3dc1926e95064b206d9 (diff)
downloadcontext-5f48570bba149ac17f45c80d5ee95306aa69d0c9.tar.gz
beta 2012.12.24 20:08
Diffstat (limited to 'scripts')
-rw-r--r--scripts/context/lua/mtx-context.lua70
-rw-r--r--scripts/context/lua/mtxrun.lua3736
-rw-r--r--scripts/context/stubs/mswin/mtxrun.lua3736
-rwxr-xr-xscripts/context/stubs/unix/mtxrun3736
4 files changed, 6835 insertions, 4443 deletions
diff --git a/scripts/context/lua/mtx-context.lua b/scripts/context/lua/mtx-context.lua
index 698a6f142..e4b5fb302 100644
--- a/scripts/context/lua/mtx-context.lua
+++ b/scripts/context/lua/mtx-context.lua
@@ -145,6 +145,30 @@ local report = application.report
scripts = scripts or { }
scripts.context = scripts.context or { }
+-- for the moment here
+
+local engine_new = getargument("engine") or directives.value("system.engine")
+local engine_old = environment.ownbin
+
+local function restart(engine_old,engine_new)
+ local command = format("%s --luaonly %q %s --redirected",engine_new,environment.ownname,environment.reconstructcommandline())
+ report(format("redirect %s -> %s: %s",engine_old,engine_new,command))
+ local result = os.execute(command)
+ os.exit(result)
+end
+
+if getargument("redirected") then
+ setargument("engine",engine_old) -- later on we need this
+elseif engine_new == engine_old then
+ setargument("engine",engine_new) -- later on we need this
+elseif environment.validengines[engine_new] and engine_new ~= environment.basicengines[engine_old] then
+ restart(engine_old,engine_new)
+else
+ setargument("engine",engine_new) -- later on we need this
+end
+
+-- so far
+
-- constants
local usedfiles = {
@@ -334,7 +358,12 @@ local function preamble_analyze(filename) -- only files on current path
multipass_nofruns = t.nofruns
end
if not t.engine then
- t.engine = 'luatex'
+ t.engine = environment.basicengines[engine_old] --'luatex'
+ end
+ if t.engine ~= engine_old then -- hack
+ if environment.validengines[t.engine] and t.engine ~= environment.basicengines[engine_old] then
+ restart(engine_old,t.engine)
+ end
end
end
return t
@@ -430,8 +459,9 @@ local function flags_to_string(flags,prefix) -- context flags get prepended by c
return concat(t," ")
end
-local function luatex_command(l_flags,c_flags,filename)
- return format('luatex %s %s "%s"',
+local function luatex_command(l_flags,c_flags,filename,engine)
+ return format('%s %s %s "%s"',
+ engine or "luatex",
flags_to_string(l_flags),
flags_to_string(c_flags,true),
filename
@@ -470,6 +500,7 @@ end
function scripts.context.run(ctxdata,filename)
--
local a_nofile = getargument("nofile")
+ local a_engine = getargument("engine")
--
local files = environment.files or { }
--
@@ -496,11 +527,11 @@ function scripts.context.run(ctxdata,filename)
--
local interface = validstring(getargument("interface")) or "en"
local formatname = formatofinterface[interface] or "cont-en"
- local formatfile, scriptfile = resolvers.locateformat(formatname)
+ local formatfile, scriptfile = resolvers.locateformat(formatname) -- regular engine !
if not formatfile or not scriptfile then
report("warning: no format found, forcing remake (commandline driven)")
scripts.context.make(formatname)
- formatfile, scriptfile = resolvers.locateformat(formatname)
+ formatfile, scriptfile = resolvers.locateformat(formatname) -- variant
end
if formatfile and scriptfile then
-- okay
@@ -526,6 +557,7 @@ function scripts.context.run(ctxdata,filename)
local a_backend = getargument("backend")
local a_arrange = getargument("arrange")
local a_noarrange = getargument("noarrange")
+ local a_jit = getargument("jit")
--
for i=1,#filelist do
--
@@ -550,11 +582,10 @@ function scripts.context.run(ctxdata,filename)
end
if not formatfile or not scriptfile then
report("warning: no format found, forcing remake (source driven)")
- scripts.context.make(formatname)
+ scripts.context.make(formatname,a_engine)
formatfile, scriptfile = resolvers.locateformat(formatname)
end
if formatfile and scriptfile then
- --
local suffix = validstring(getargument("suffix"))
local resultname = validstring(getargument("result"))
if suffix then
@@ -583,13 +614,17 @@ function scripts.context.run(ctxdata,filename)
pdf_close(resultname,pdfview)
end
end
- --
- local okay = statistics.checkfmtstatus(formatfile)
+ local okay = statistics.checkfmtstatus(formatfile,a_engine)
if okay ~= true then
report("warning: %s, forcing remake",tostring(okay))
scripts.context.make(formatname)
end
--
+-- if a_engine and a_engine ~= "" and a_engine ~= "luatex" then
+-- formatfile = gsub(formatfile,"/luatex%-cache/",format("/%s-cache/",a_engine))
+-- scriptfile = gsub(scriptfile,"/luatex%-cache/",format("/%s-cache/",a_engine))
+-- end
+ --
local oldhash = multipass_hashfiles(jobname)
local newhash = { }
local maxnofruns = once and 1 or multipass_nofruns
@@ -621,6 +656,7 @@ function scripts.context.run(ctxdata,filename)
["fmt"] = formatfile,
["lua"] = scriptfile,
["jobname"] = jobname,
+ ["jiton"] = a_jit and true or nil,
}
--
if a_synctex then
@@ -652,7 +688,7 @@ function scripts.context.run(ctxdata,filename)
c_flags.currentrun = currentrun
c_flags.noarrange = a_noarrange or a_arrange or nil
--
- local command = luatex_command(l_flags,c_flags,mainfile)
+ local command = luatex_command(l_flags,c_flags,mainfile,a_engine)
--
report("run %s: %s",i,command)
print("") -- cleaner, else continuation on same line
@@ -690,7 +726,7 @@ function scripts.context.run(ctxdata,filename)
c_flags.currentrun = c_flags.currentrun + 1
c_flags.noarrange = nil
--
- local command = luatex_command(l_flags,c_flags,mainfile)
+ local command = luatex_command(l_flags,c_flags,mainfile,a_engine)
--
report("arrange run: %s",command)
local returncode, errorstring = os.spawn(command)
@@ -808,7 +844,9 @@ function scripts.context.pipe() -- still used?
end
end
-local make_mkiv_format = environment.make_format
+local function make_mkiv_format(name,engine)
+ environment.make_format(name)
+end
local function make_mkii_format(name,engine)
local command = format("mtxrun texexec.rb --make --%s %s",name,engine)
@@ -833,8 +871,8 @@ function scripts.context.make(name)
name = formatofinterface[name] or name or ""
if name == "" then
-- nothing
- elseif engine == "luatex" then
- make_mkiv_format(name)
+ elseif engine == "luatex" or engine == "luajittex" then
+ make_mkiv_format(name,engine)
elseif engine == "pdftex" or engine == "xetex" then
make_mkii_format(name,engine)
end
@@ -1438,7 +1476,3 @@ elseif getargument("purgeall") then
else
application.help("basic")
end
-
-if getargument("profile") then
- os.setenv("MTX_PROFILE_RUN","NO")
-end
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 01c601eb5..3a02ad582 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -1,5 +1,16 @@
#!/usr/bin/env texlua
+-- for k, v in next, _G.string do
+-- local tv = type(v)
+-- if tv == "table" then
+-- for kk, vv in next, v do
+-- print(k,kk,vv)
+-- end
+-- else
+-- print(tv,k,v)
+-- end
+-- end
+
if not modules then modules = { } end modules ['mtxrun'] = {
version = 1.001,
comment = "runner, lua replacement for texmfstart.rb",
@@ -43,7 +54,7 @@ if not modules then modules = { } end modules ['mtxrun'] = {
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-functions'] = {
+if not modules then modules = { } end modules ['l-lua'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -51,16 +62,112 @@ if not modules then modules = { } end modules ['l-functions'] = {
license = "see context related readme files"
}
-functions = functions or { }
+-- compatibility hacks ... try to avoid usage
-function functions.dummy() end
+local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$")
+
+_MAJORVERSION = tonumber(major) or 5
+_MINORVERSION = tonumber(minor) or 1
+
+-- basics:
+
+if loadstring then
+
+ local loadnormal = load
+
+ function load(first,...)
+ if type(first) == "string" then
+ return loadstring(first,...)
+ else
+ return loadnormal(first,...)
+ end
+ end
+
+else
+
+ loadstring = load
+
+end
+
+-- table:
+
+-- Starting with version 5.2 Lua no longer provide ipairs, which makes
+-- sense. As we already used the for loop and # in most places the
+-- impact on ConTeXt was not that large; the remaining ipairs already
+-- have been replaced. In a similar fashion we also hardly used pairs.
+--
+-- Hm, actually ipairs was retained, but we no longer use it anyway.
+--
+-- Just in case, we provide the fallbacks as discussed in Programming
+-- in Lua (http://www.lua.org/pil/7.3.html):
+
+if not ipairs then
+
+ -- for k, v in ipairs(t) do ... end
+ -- for k=1,#t do local v = t[k] ... end
+
+ local function iterate(a,i)
+ i = i + 1
+ local v = a[i]
+ if v ~= nil then
+ return i, v --, nil
+ end
+ end
+
+ function ipairs(a)
+ return iterate, a, 0
+ end
+
+end
+
+if not pairs then
+
+ -- for k, v in pairs(t) do ... end
+ -- for k, v in next, t do ... end
+
+ function pairs(t)
+ return next, t -- , nil
+ end
+
+end
+
+-- The unpack function has been moved to the table table, and for compatiility
+-- reasons we provide both now.
+
+if not table.unpack then
+
+ table.unpack = _G.unpack
+
+elseif not unpack then
+
+ _G.unpack = table.unpack
+
+end
+
+-- package:
+
+-- if not package.seachers then
+--
+-- package.searchers = package.loaders -- 5.2
+--
+-- elseif not package.loaders then
+--
+-- package.loaders = package.searchers
+--
+-- end
+
+if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type
+
+ package.loaders = package.searchers
+
+end
end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-string'] = {
+if not modules then modules = { } end modules ['l-lpeg'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -68,34 +175,843 @@ if not modules then modules = { } end modules ['l-string'] = {
license = "see context related readme files"
}
-local string = string
-local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
-local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct
+-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
--- some functions may disappear as they are not used anywhere
+-- move utf -> l-unicode
+-- move string -> l-string or keep it here
-if not string.split then
+local lpeg = require("lpeg")
- -- this will be overloaded by a faster lpeg variant
+-- tracing (only used when we encounter a problem in integration of lpeg in luatex)
- function string.split(str,pattern)
- local t = { }
- if #str > 0 then
- local n = 1
- for s in gmatch(str..pattern,"(.-)"..pattern) do
- t[n] = s
- n = n + 1
+-- some code will move to unicode and string
+
+local report = texio and texio.write_nl or print
+
+-- local lpmatch = lpeg.match
+-- local lpprint = lpeg.print
+-- local lpp = lpeg.P
+-- local lpr = lpeg.R
+-- local lps = lpeg.S
+-- local lpc = lpeg.C
+-- local lpb = lpeg.B
+-- local lpv = lpeg.V
+-- local lpcf = lpeg.Cf
+-- local lpcb = lpeg.Cb
+-- local lpcg = lpeg.Cg
+-- local lpct = lpeg.Ct
+-- local lpcs = lpeg.Cs
+-- local lpcc = lpeg.Cc
+-- local lpcmt = lpeg.Cmt
+-- local lpcarg = lpeg.Carg
+
+-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end
+
+-- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end
+-- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end
+-- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end
+-- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end
+-- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end
+-- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end
+-- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end
+-- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end
+-- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end
+-- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end
+-- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end
+-- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end
+-- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end
+-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end
+
+local type, next = type, next
+local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
+
+-- Beware, we predefine a bunch of patterns here and one reason for doing so
+-- is that we get consistent behaviour in some of the visualizers.
+
+lpeg.patterns = lpeg.patterns or { } -- so that we can share
+local patterns = lpeg.patterns
+
+local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt
+local lpegtype, lpegmatch = lpeg.type, lpeg.match
+
+local anything = P(1)
+local endofstring = P(-1)
+local alwaysmatched = P(true)
+
+patterns.anything = anything
+patterns.endofstring = endofstring
+patterns.beginofstring = alwaysmatched
+patterns.alwaysmatched = alwaysmatched
+
+local digit, sign = R('09'), S('+-')
+local cr, lf, crlf = P("\r"), P("\n"), P("\r\n")
+local newline = crlf + S("\r\n") -- cr + lf
+local escaped = P("\\") * anything
+local squote = P("'")
+local dquote = P('"')
+local space = P(" ")
+
+local utfbom_32_be = P('\000\000\254\255')
+local utfbom_32_le = P('\255\254\000\000')
+local utfbom_16_be = P('\255\254')
+local utfbom_16_le = P('\254\255')
+local utfbom_8 = P('\239\187\191')
+local utfbom = utfbom_32_be + utfbom_32_le
+ + utfbom_16_be + utfbom_16_le
+ + utfbom_8
+local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le")
+ + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le")
+ + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8
+local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4)
+ + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2)
+ + utfbom_8 * Cc(3) + Cc(0)
+
+local utf8next = R("\128\191")
+
+patterns.utf8one = R("\000\127")
+patterns.utf8two = R("\194\223") * utf8next
+patterns.utf8three = R("\224\239") * utf8next * utf8next
+patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next
+patterns.utfbom = utfbom
+patterns.utftype = utftype
+patterns.utfoffset = utfoffset
+
+local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
+local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false)
+
+patterns.utf8 = utf8char
+patterns.utf8char = utf8char
+patterns.validutf8 = validutf8char
+patterns.validutf8char = validutf8char
+
+local eol = S("\n\r")
+local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
+local whitespace = eol + spacer
+local nonspacer = 1 - spacer
+local nonwhitespace = 1 - whitespace
+
+patterns.eol = eol
+patterns.spacer = spacer
+patterns.whitespace = whitespace
+patterns.nonspacer = nonspacer
+patterns.nonwhitespace = nonwhitespace
+
+local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto
+
+----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0)
+local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0))
+
+patterns.stripper = stripper
+patterns.collapser = collapser
+
+patterns.digit = digit
+patterns.sign = sign
+patterns.cardinal = sign^0 * digit^1
+patterns.integer = sign^0 * digit^1
+patterns.unsigned = digit^0 * P('.') * digit^1
+patterns.float = sign^0 * patterns.unsigned
+patterns.cunsigned = digit^0 * P(',') * digit^1
+patterns.cfloat = sign^0 * patterns.cunsigned
+patterns.number = patterns.float + patterns.integer
+patterns.cnumber = patterns.cfloat + patterns.integer
+patterns.oct = P("0") * R("07")^1
+patterns.octal = patterns.oct
+patterns.HEX = P("0x") * R("09","AF")^1
+patterns.hex = P("0x") * R("09","af")^1
+patterns.hexadecimal = P("0x") * R("09","AF","af")^1
+patterns.lowercase = R("az")
+patterns.uppercase = R("AZ")
+patterns.letter = patterns.lowercase + patterns.uppercase
+patterns.space = space
+patterns.tab = P("\t")
+patterns.spaceortab = patterns.space + patterns.tab
+patterns.newline = newline
+patterns.emptyline = newline^1
+patterns.equal = P("=")
+patterns.comma = P(",")
+patterns.commaspacer = P(",") * spacer^0
+patterns.period = P(".")
+patterns.colon = P(":")
+patterns.semicolon = P(";")
+patterns.underscore = P("_")
+patterns.escaped = escaped
+patterns.squote = squote
+patterns.dquote = dquote
+patterns.nosquote = (escaped + (1-squote))^0
+patterns.nodquote = (escaped + (1-dquote))^0
+patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle
+patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle
+patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble
+patterns.unspacer = ((patterns.spacer^1)/"")^0
+
+patterns.singlequoted = squote * patterns.nosquote * squote
+patterns.doublequoted = dquote * patterns.nodquote * dquote
+patterns.quoted = patterns.doublequoted + patterns.singlequoted
+
+patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1)
+
+patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1
+patterns.beginline = #(1-newline)
+
+local function anywhere(pattern) --slightly adapted from website
+ return P { P(pattern) + 1 * V(1) }
+end
+
+lpeg.anywhere = anywhere
+
+function lpeg.instringchecker(p)
+ p = anywhere(p)
+ return function(str)
+ return lpegmatch(p,str) and true or false
+ end
+end
+
+function lpeg.splitter(pattern, action)
+ return (((1-P(pattern))^1)/action+1)^0
+end
+
+function lpeg.tsplitter(pattern, action)
+ return Ct((((1-P(pattern))^1)/action+1)^0)
+end
+
+-- probleem: separator can be lpeg and that does not hash too well, but
+-- it's quite okay as the key is then not garbage collected
+
+local splitters_s, splitters_m, splitters_t = { }, { }, { }
+
+local function splitat(separator,single)
+ local splitter = (single and splitters_s[separator]) or splitters_m[separator]
+ if not splitter then
+ separator = P(separator)
+ local other = C((1 - separator)^0)
+ if single then
+ local any = anything
+ splitter = other * (separator * C(any^0) + "") -- ?
+ splitters_s[separator] = splitter
+ else
+ splitter = other * (separator * other)^0
+ splitters_m[separator] = splitter
+ end
+ end
+ return splitter
+end
+
+local function tsplitat(separator)
+ local splitter = splitters_t[separator]
+ if not splitter then
+ splitter = Ct(splitat(separator))
+ splitters_t[separator] = splitter
+ end
+ return splitter
+end
+
+lpeg.splitat = splitat
+lpeg.tsplitat = tsplitat
+
+function string.splitup(str,separator)
+ if not separator then
+ separator = ","
+ end
+ return lpegmatch(splitters_m[separator] or splitat(separator),str)
+end
+
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps
+
+local cache = { }
+
+function lpeg.split(separator,str)
+ local c = cache[separator]
+ if not c then
+ c = tsplitat(separator)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+function string.split(str,separator)
+ if separator then
+ local c = cache[separator]
+ if not c then
+ c = tsplitat(separator)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+ else
+ return { str }
+ end
+end
+
+local spacing = patterns.spacer^0 * newline -- sort of strip
+local empty = spacing * Cc("")
+local nonempty = Cs((1-spacing)^1) * spacing^-1
+local content = (empty + nonempty)^1
+
+patterns.textline = content
+
+local linesplitter = tsplitat(newline)
+
+patterns.linesplitter = linesplitter
+
+function string.splitlines(str)
+ return lpegmatch(linesplitter,str)
+end
+
+-- lpeg.splitters = cache -- no longer public
+
+local cache = { }
+
+function lpeg.checkedsplit(separator,str)
+ local c = cache[separator]
+ if not c then
+ separator = P(separator)
+ local other = C((1 - separator)^1)
+ c = Ct(separator^0 * other * (separator^1 * other)^0)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+function string.checkedsplit(str,separator)
+ local c = cache[separator]
+ if not c then
+ separator = P(separator)
+ local other = C((1 - separator)^1)
+ c = Ct(separator^0 * other * (separator^1 * other)^0)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+-- from roberto's site:
+
+local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
+local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
+local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
+
+local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
+
+patterns.utf8byte = utf8byte
+
+
+
+local cache = { }
+
+function lpeg.stripper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs(((S(str)^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs(((str^1)/"" + 1)^0)
+ end
+end
+
+local cache = { }
+
+function lpeg.keeper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs((((1-S(str))^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs((((1-str)^1)/"" + 1)^0)
+ end
+end
+
+function lpeg.frontstripper(str) -- or pattern (yet undocumented)
+ return (P(str) + P(true)) * Cs(anything^0)
+end
+
+function lpeg.endstripper(str) -- or pattern (yet undocumented)
+ return Cs((1 - P(str) * endofstring)^0)
+end
+
+-- Just for fun I looked at the used bytecode and
+-- p = (p and p + pp) or pp gets one more (testset).
+
+-- todo: cache when string
+
+function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys
+ local pattern
+ local u = isutf and utf8char or 1
+ if type(one) == "table" then
+ local no = #one
+ local p = P(false)
+ if no == 0 then
+ for k, v in next, one do
+ p = p + P(k) / v
+ end
+ pattern = Cs((p + u)^0)
+ elseif no == 1 then
+ local o = one[1]
+ one, two = P(o[1]), o[2]
+ -- pattern = Cs(((1-one)^1 + one/two)^0)
+ pattern = Cs((one/two + u)^0)
+ else
+ for i=1,no do
+ local o = one[i]
+ p = p + P(o[1]) / o[2]
+ end
+ pattern = Cs((p + u)^0)
+ end
+ else
+ pattern = Cs((P(one)/(two or "") + u)^0)
+ end
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+function lpeg.finder(lst,makefunction)
+ local pattern
+ if type(lst) == "table" then
+ pattern = P(false)
+ if #lst == 0 then
+ for k, v in next, lst do
+ pattern = pattern + P(k) -- ignore key, so we can use a replacer table
+ end
+ else
+ for i=1,#lst do
+ pattern = pattern + P(lst[i])
+ end
+ end
+ else
+ pattern = P(lst)
+ end
+ pattern = (1-pattern)^0 * pattern
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+-- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
+-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
+-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
+
+local splitters_f, splitters_s = { }, { }
+
+function lpeg.firstofsplit(separator) -- always return value
+ local splitter = splitters_f[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = C((1 - separator)^0)
+ splitters_f[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.secondofsplit(separator) -- nil if not split
+ local splitter = splitters_s[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = (1 - separator)^0 * separator * C(anything^0)
+ splitters_s[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.balancer(left,right)
+ left, right = P(left), P(right)
+ return P { left * ((1 - left - right) + V(1))^0 * right }
+end
+
+-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de"))
+-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty
+-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc"))
+-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de"))
+-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty
+-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc"))
+-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc"))
+-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc"))
+
+-- -- slower:
+--
+-- function lpeg.counter(pattern)
+-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0
+-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end
+-- end
+
+local nany = utf8char/""
+
+function lpeg.counter(pattern)
+ pattern = Cs((P(pattern)/" " + nany)^0)
+ return function(str)
+ return #lpegmatch(pattern,str)
+ end
+end
+
+-- utf extensies
+
+local utfcharacters = utf and utf.characters or string.utfcharacters
+local utfgmatch = unicode and unicode.utf8.gmatch
+local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char)
+
+lpeg.UP = lpeg.P
+
+if utfcharacters then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfcharacters(str) do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+
+elseif utfgmatch then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfgmatch(str,".") do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+else
+
+ function lpeg.US(str)
+ local p = P(false)
+ local f = function(uc)
+ p = p + P(uc)
+ end
+ lpegmatch((utf8char/f)^0,str)
+ return p
+ end
+
+end
+
+local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture
+
+function lpeg.UR(str,more)
+ local first, last
+ if type(str) == "number" then
+ first = str
+ last = more or first
+ else
+ first, last = lpegmatch(range,str)
+ if not last then
+ return P(str)
+ end
+ end
+ if first == last then
+ return P(str)
+ elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium
+ local p = P(false)
+ for i=first,last do
+ p = p + P(utfchar(i))
+ end
+ return p -- nil when invalid range
+ else
+ local f = function(b)
+ return b >= first and b <= last
+ end
+ -- tricky, these nested captures
+ return utf8byte / f -- nil when invalid range
+ end
+end
+
+-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω"))
+
+-- lpeg.print(lpeg.R("ab","cd","gh"))
+-- lpeg.print(lpeg.P("a","b","c"))
+-- lpeg.print(lpeg.S("a","b","c"))
+
+-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à")))
+-- print(lpeg.count("äáàa",lpeg.UP("áà")))
+-- print(lpeg.count("äáàa",lpeg.US("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR("aá")))
+-- print(lpeg.count("äáàa",lpeg.UR("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF)))
+
+function lpeg.is_lpeg(p)
+ return p and lpegtype(p) == "pattern"
+end
+
+function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
+ if type(list) ~= "table" then
+ list = { list, ... }
+ end
+ -- table.sort(list) -- longest match first
+ local p = P(list[1])
+ for l=2,#list do
+ p = p + P(list[l])
+ end
+ return p
+end
+
+-- For the moment here, but it might move to utilities. Beware, we need to
+-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
+-- loop back from the end cq. prepend.
+
+local sort = table.sort
+
+local function copyindexed(old)
+ local new = { }
+ for i=1,#old do
+ new[i] = old
+ end
+ return new
+end
+
+local function sortedkeys(tab)
+ local keys, s = { }, 0
+ for key,_ in next, tab do
+ s = s + 1
+ keys[s] = key
+ end
+ sort(keys)
+ return keys
+end
+
+function lpeg.append(list,pp,delayed,checked)
+ local p = pp
+ if #list > 0 then
+ local keys = copyindexed(list)
+ sort(keys)
+ for i=#keys,1,-1 do
+ local k = keys[i]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ elseif delayed then -- hm, it looks like the lpeg parser resolves anyway
+ local keys = sortedkeys(list)
+ if p then
+ for i=1,#keys,1 do
+ local k = keys[i]
+ local v = list[k]
+ p = P(k)/list + p
+ end
+ else
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ if p then
+ p = p / list
+ end
+ end
+ elseif checked then
+ -- problem: substitution gives a capture
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ if k == v then
+ p = P(k) + p
+ else
+ p = P(k)/v + p
+ end
+ else
+ if k == v then
+ p = P(k)
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ else
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k)/v + p
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ return p
+end
+
+-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true))
+-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true))
+
+-- function lpeg.exact_match(words,case_insensitive)
+-- local pattern = concat(words)
+-- if case_insensitive then
+-- local pattern = S(upper(characters)) + S(lower(characters))
+-- local list = { }
+-- for i=1,#words do
+-- list[lower(words[i])] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[lower(s)] and i
+-- end)
+-- else
+-- local pattern = S(concat(words))
+-- local list = { }
+-- for i=1,#words do
+-- list[words[i]] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[s] and i
+-- end)
+-- end
+-- end
+
+-- experiment:
+
+local function make(t)
+ local p
+ local keys = sortedkeys(t)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = t[k]
+ if not p then
+ if next(v) then
+ p = P(k) * make(v)
+ else
+ p = P(k)
+ end
+ else
+ if next(v) then
+ p = p + P(k) * make(v)
+ else
+ p = p + P(k)
end
end
- return t
end
+ return p
+end
+function lpeg.utfchartabletopattern(list) -- goes to util-lpg
+ local tree = { }
+ for i=1,#list do
+ local t = tree
+ for c in gmatch(list[i],".") do
+ if not t[c] then
+ t[c] = { }
+ end
+ t = t[c]
+ end
+ end
+ return make(tree)
end
+-- inspect ( lpeg.utfchartabletopattern {
+-- utfchar(0x00A0), -- nbsp
+-- utfchar(0x2000), -- enquad
+-- utfchar(0x2001), -- emquad
+-- utfchar(0x2002), -- enspace
+-- utfchar(0x2003), -- emspace
+-- utfchar(0x2004), -- threeperemspace
+-- utfchar(0x2005), -- fourperemspace
+-- utfchar(0x2006), -- sixperemspace
+-- utfchar(0x2007), -- figurespace
+-- utfchar(0x2008), -- punctuationspace
+-- utfchar(0x2009), -- breakablethinspace
+-- utfchar(0x200A), -- hairspace
+-- utfchar(0x200B), -- zerowidthspace
+-- utfchar(0x202F), -- narrownobreakspace
+-- utfchar(0x205F), -- math thinspace
+-- } )
+
+-- a few handy ones:
+--
+-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3
+
+patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['l-functions'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+functions = functions or { }
+
+function functions.dummy() end
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['l-string'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local string = string
+local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs
+
+-- Some functions are already defined in l-lpeg and maybe some from here will
+-- move there (unless we also expose caches).
+
+-- if not string.split then
+--
+-- function string.split(str,pattern)
+-- local t = { }
+-- if #str > 0 then
+-- local n = 1
+-- for s in gmatch(str..pattern,"(.-)"..pattern) do
+-- t[n] = s
+-- n = n + 1
+-- end
+-- end
+-- return t
+-- end
+--
+-- end
+
+-- function string.unquoted(str)
+-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern
+-- end
+
+local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote
+ + patterns.dquote * C(patterns.nodquote) * patterns.dquote
+
function string.unquoted(str)
- return (gsub(str,"^([\"\'])(.*)%1$","%2"))
+ return lpegmatch(unquoted,str) or str
end
+-- print(string.unquoted("test"))
+-- print(string.unquoted([["t\"est"]]))
+-- print(string.unquoted([["t\"est"x]]))
+-- print(string.unquoted("\'test\'"))
+-- print(string.unquoted('"test"'))
+-- print(string.unquoted('"test"'))
function string.quoted(str)
return format("%q",str) -- always "
@@ -118,65 +1034,112 @@ function string.limit(str,n,sentinel) -- not utf proof
end
end
-local space = S(" \t\v\n")
-local nospace = 1 - space
-local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code
+local stripper = patterns.stripper
+local collapser = patterns.collapser
function string.strip(str)
return lpegmatch(stripper,str) or ""
end
+function string.collapsespaces(str)
+ return lpegmatch(collapser,str) or ""
+end
+
+-- function string.is_empty(str)
+-- return not find(str,"%S")
+-- end
+
+local pattern = P(" ")^0 * P(-1)
+
function string.is_empty(str)
- return not find(str,"%S")
+ if str == "" then
+ return true
+ else
+ return lpegmatch(pattern,str) and true or false
+ end
end
-local patterns_escapes = {
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%(", [")"] = "%)",
- -- ["{"] = "%{", ["}"] = "%}"
- -- ["^"] = "%^", ["$"] = "%$",
-}
-local simple_escapes = {
- ["-"] = "%-",
- ["."] = "%.",
- ["?"] = ".",
- ["*"] = ".*",
-}
+-- if not string.escapedpattern then
+--
+-- local patterns_escapes = {
+-- ["%"] = "%%",
+-- ["."] = "%.",
+-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
+-- ["["] = "%[", ["]"] = "%]",
+-- ["("] = "%(", [")"] = "%)",
+-- -- ["{"] = "%{", ["}"] = "%}"
+-- -- ["^"] = "%^", ["$"] = "%$",
+-- }
+--
+-- local simple_escapes = {
+-- ["-"] = "%-",
+-- ["."] = "%.",
+-- ["?"] = ".",
+-- ["*"] = ".*",
+-- }
+--
+-- function string.escapedpattern(str,simple)
+-- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
+-- end
+--
+-- function string.topattern(str,lowercase,strict)
+-- if str == "" then
+-- return ".*"
+-- else
+-- str = gsub(str,".",simple_escapes)
+-- if lowercase then
+-- str = lower(str)
+-- end
+-- if strict then
+-- return "^" .. str .. "$"
+-- else
+-- return str
+-- end
+-- end
+-- end
+--
+-- end
+
+--- needs checking
+
+local anything = patterns.anything
+local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ?
+local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ?
+local matchescapes = Cc(".") * S("*?") -- wildcard and single match
+
+local pattern_a = Cs ( ( allescapes + anything )^0 )
+local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 )
+local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") )
function string.escapedpattern(str,simple)
- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
+ return lpegmatch(simple and pattern_b or pattern_a,str)
end
function string.topattern(str,lowercase,strict)
if str == "" then
return ".*"
+ elseif strict then
+ str = lpegmatch(pattern_c,str)
else
- str = gsub(str,".",simple_escapes)
- if lowercase then
- str = lower(str)
- end
- if strict then
- return "^" .. str .. "$"
- else
- return str
- end
+ str = lpegmatch(pattern_b,str)
+ end
+ if lowercase then
+ return lower(str)
+ else
+ return str
end
end
+-- print(string.escapedpattern("12+34*.tex",false))
+-- print(string.escapedpattern("12+34*.tex",true))
+-- print(string.topattern ("12+34*.tex",false,false))
+-- print(string.topattern ("12+34*.tex",false,true))
function string.valid(str,default)
return (type(str) == "string" and str ~= "" and str) or default or nil
end
--- obsolete names:
-
-string.quote = string.quoted
-string.unquote = string.unquoted
-
-- handy fallback
string.itself = function(s) return s end
@@ -189,6 +1152,19 @@ function string.totable(str)
return lpegmatch(pattern,str)
end
+-- handy from within tex:
+
+local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
+
+function string.tformat(fmt,...)
+ return format(lpegmatch(replacer,fmt),...)
+end
+
+-- obsolete names:
+
+string.quote = string.quoted
+string.unquote = string.unquoted
+
end -- of closure
@@ -202,68 +1178,23 @@ if not modules then modules = { } end modules ['l-table'] = {
license = "see context related readme files"
}
-local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs
+local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select
local table, string = table, string
local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove
-local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match
+local format, lower, dump = string.format, string.lower, string.dump
local getmetatable, setmetatable = getmetatable, setmetatable
local getinfo = debug.getinfo
-
--- Starting with version 5.2 Lua no longer provide ipairs, which makes
--- sense. As we already used the for loop and # in most places the
--- impact on ConTeXt was not that large; the remaining ipairs already
--- have been replaced. In a similar fashion we also hardly used pairs.
---
--- Hm, actually ipairs was retained, but we no longer use it anyway.
---
--- Just in case, we provide the fallbacks as discussed in Programming
--- in Lua (http://www.lua.org/pil/7.3.html):
-
-if not ipairs then
-
- -- for k, v in ipairs(t) do ... end
- -- for k=1,#t do local v = t[k] ... end
-
- local function iterate(a,i)
- i = i + 1
- local v = a[i]
- if v ~= nil then
- return i, v --, nil
- end
- end
-
- function ipairs(a)
- return iterate, a, 0
- end
-
-end
-
-if not pairs then
-
- -- for k, v in pairs(t) do ... end
- -- for k, v in next, t do ... end
-
- function pairs(t)
- return next, t -- , nil
- end
-
-end
-
--- Also, unpack has been moved to the table table, and for compatiility
--- reasons we provide both now.
-
-if not table.unpack then
- table.unpack = _G.unpack
-elseif not unpack then
- _G.unpack = table.unpack
-end
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local floor = math.floor
-- extra functions, some might go (when not used)
+local stripper = patterns.stripper
+
function table.strip(tab)
local lst, l = { }, 0
for i=1,#tab do
- local s = gsub(tab[i],"^%s*(.-)%s*$","%1")
+ local s = lpegmatch(stripper,tab[i]) or ""
if s == "" then
-- skip this one
else
@@ -372,7 +1303,7 @@ local function sortedhash(t)
end
table.sortedhash = sortedhash
-table.sortedpairs = sortedhash
+table.sortedpairs = sortedhash -- obsolete
function table.append(t,list)
local n = #t
@@ -396,31 +1327,63 @@ function table.prepend(t, list)
return t
end
+-- function table.merge(t, ...) -- first one is target
+-- t = t or { }
+-- local lst = { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- t[k] = v
+-- end
+-- end
+-- return t
+-- end
+
function table.merge(t, ...) -- first one is target
t = t or { }
- local lst = { ... }
- for i=1,#lst do
- for k, v in next, lst[i] do
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
t[k] = v
end
end
return t
end
+-- function table.merged(...)
+-- local tmp, lst = { }, { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- tmp[k] = v
+-- end
+-- end
+-- return tmp
+-- end
+
function table.merged(...)
- local tmp, lst = { }, { ... }
- for i=1,#lst do
- for k, v in next, lst[i] do
- tmp[k] = v
+ local t = { }
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
+ t[k] = v
end
end
- return tmp
+ return t
end
+-- function table.imerge(t, ...)
+-- local lst, nt = { ... }, #t
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- nt = nt + 1
+-- t[nt] = nst[j]
+-- end
+-- end
+-- return t
+-- end
+
function table.imerge(t, ...)
- local lst, nt = { ... }, #t
- for i=1,#lst do
- local nst = lst[i]
+ local nt = #t
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
nt = nt + 1
t[nt] = nst[j]
@@ -429,10 +1392,22 @@ function table.imerge(t, ...)
return t
end
+-- function table.imerged(...)
+-- local tmp, ntmp, lst = { }, 0, {...}
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- ntmp = ntmp + 1
+-- tmp[ntmp] = nst[j]
+-- end
+-- end
+-- return tmp
+-- end
+
function table.imerged(...)
- local tmp, ntmp, lst = { }, 0, {...}
- for i=1,#lst do
- local nst = lst[i]
+ local tmp, ntmp = { }, 0
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
ntmp = ntmp + 1
tmp[ntmp] = nst[j]
@@ -444,7 +1419,7 @@ end
local function fastcopy(old,metatabletoo) -- fast one
if old then
local new = { }
- for k,v in next, old do
+ for k, v in next, old do
if type(v) == "table" then
new[k] = fastcopy(v,metatabletoo) -- was just table.copy
else
@@ -498,7 +1473,7 @@ end
table.fastcopy = fastcopy
table.copy = copy
-function table.derive(parent)
+function table.derive(parent) -- for the moment not public
local child = { }
if parent then
setmetatable(child,{ __index = parent })
@@ -579,6 +1554,13 @@ end
-- problem: there no good number_to_string converter with the best resolution
+-- probably using .. is faster than format
+-- maybe split in a few cases (yes/no hexify)
+
+-- todo: %g faster on numbers than %s
+
+local propername = patterns.propername -- was find(name,"^%a[%w%_]*$")
+
local function dummy() end
local function do_serialize(root,name,depth,level,indexed)
@@ -588,14 +1570,14 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s{",depth))
else
local tn = type(name)
- if tn == "number" then -- or find(k,"^%d+$") then
+ if tn == "number" then
if hexify then
handle(format("%s[0x%04X]={",depth,name))
else
handle(format("%s[%s]={",depth,name))
end
elseif tn == "string" then
- if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
+ if noquotes and not reserved[name] and lpegmatch(propername,name) then
handle(format("%s%s={",depth,name))
else
handle(format("%s[%q]={",depth,name))
@@ -621,7 +1603,6 @@ local function do_serialize(root,name,depth,level,indexed)
if compact then
last = #root
for k=1,last do
--- if not root[k] then
if root[k] == nil then
last = k - 1
break
@@ -667,7 +1648,7 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s %s,",depth,tostring(v)))
elseif t == "function" then
if functions then
- handle(format('%s loadstring(%q),',depth,dump(v)))
+ handle(format('%s load(%q),',depth,dump(v)))
else
handle(format('%s "function",',depth))
end
@@ -679,7 +1660,7 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s __p__=nil,",depth))
end
elseif t == "number" then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=0x%04X,",depth,k,v))
else
@@ -691,7 +1672,7 @@ local function do_serialize(root,name,depth,level,indexed)
else
handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
if hexify then
handle(format("%s %s=0x%04X,",depth,k,v))
else
@@ -706,7 +1687,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "string" then
if reduce and tonumber(v) then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,v))
else
@@ -714,13 +1695,13 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]=%s,",depth,tostring(k),v))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,v))
else
handle(format("%s [%q]=%s,",depth,k,v))
end
else
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,v))
else
@@ -728,7 +1709,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]=%q,",depth,tostring(k),v))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,v))
else
handle(format("%s [%q]=%q,",depth,k,v))
@@ -736,7 +1717,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "table" then
if not next(v) then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={},",depth,k))
else
@@ -744,7 +1725,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]={},",depth,tostring(k)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={},",depth,k))
else
handle(format("%s [%q]={},",depth,k))
@@ -752,15 +1733,15 @@ local function do_serialize(root,name,depth,level,indexed)
elseif inline then
local st = simple_table(v)
if st then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%s]={ %s },",depth,k,concat(st,", ")))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", ")))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%q]={ %s },",depth,k,concat(st,", ")))
@@ -772,15 +1753,15 @@ local function do_serialize(root,name,depth,level,indexed)
do_serialize(v,k,depth,level+1)
end
elseif t == "boolean" then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%s,",depth,k,tostring(v)))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%s,",depth,k,tostring(v)))
@@ -789,30 +1770,30 @@ local function do_serialize(root,name,depth,level,indexed)
if functions then
local f = getinfo(v).what == "C" and dump(dummy) or dump(v)
-- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v)
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
- handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f))
+ handle(format("%s [0x%04X]=load(%q),",depth,k,f))
else
- handle(format("%s [%s]=loadstring(%q),",depth,k,f))
+ handle(format("%s [%s]=load(%q),",depth,k,f))
end
elseif tk == "boolean" then
- handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
- handle(format("%s %s=loadstring(%q),",depth,k,f))
+ handle(format("%s [%s]=load(%q),",depth,tostring(k),f))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+ handle(format("%s %s=load(%q),",depth,k,f))
else
- handle(format("%s [%q]=loadstring(%q),",depth,k,f))
+ handle(format("%s [%q]=load(%q),",depth,k,f))
end
end
else
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%q,",depth,k,tostring(v)))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%q,",depth,k,tostring(v)))
@@ -892,6 +1873,14 @@ local function serialize(_handle,root,name,specification) -- handle wins
handle("}")
end
+-- name:
+--
+-- true : return { }
+-- false : { }
+-- nil : t = { }
+-- string : string = { }
+-- "return" : return { }
+-- number : [number] = { }
function table.serialize(root,name,specification)
local t, n = { }, 0
@@ -980,7 +1969,7 @@ table.flattened = flattened
local function unnest(t,f) -- only used in mk, for old times sake
if not f then -- and only relevant for token lists
- f = { }
+ f = { } -- this one can become obsolete
end
for i=1,#t do
local v = t[i]
@@ -1009,7 +1998,7 @@ local function are_equal(a,b,n,m) -- indexed
local ai, bi = a[i], b[i]
if ai==bi then
-- same
- elseif type(ai)=="table" and type(bi)=="table" then
+ elseif type(ai) == "table" and type(bi) == "table" then
if not are_equal(ai,bi) then
return false
end
@@ -1044,10 +2033,10 @@ table.are_equal = are_equal
-- maybe also make a combined one
-function table.compact(t)
+function table.compact(t) -- remove empty tables, assumes subtables
if t then
- for k,v in next, t do
- if not next(v) then
+ for k, v in next, t do
+ if not next(v) then -- no type checking
t[k] = nil
end
end
@@ -1086,7 +2075,7 @@ function table.swapped(t,s) -- hash
return n
end
-function table.mirror(t) -- hash
+function table.mirrored(t) -- hash
local n = { }
for k, v in next, t do
n[v] = k
@@ -1109,6 +2098,17 @@ function table.reversed(t)
end
end
+function table.reverse(t)
+ if t then
+ local n = #t
+ for i=1,floor(n/2) do
+ local j = n - i + 1
+ t[i], t[j] = t[j], t[i]
+ end
+ return t
+ end
+end
+
function table.sequenced(t,sep) -- hash only
if t then
local s, n = { }, 0
@@ -1196,848 +2196,6 @@ end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-lpeg'] = {
- version = 1.001,
- comment = "companion to luat-lib.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-
--- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
-
-local lpeg = require("lpeg")
-
--- tracing (only used when we encounter a problem in integration of lpeg in luatex)
-
--- some code will move to unicode and string
-
-local report = texio and texio.write_nl or print
-
--- local lpmatch = lpeg.match
--- local lpprint = lpeg.print
--- local lpp = lpeg.P
--- local lpr = lpeg.R
--- local lps = lpeg.S
--- local lpc = lpeg.C
--- local lpb = lpeg.B
--- local lpv = lpeg.V
--- local lpcf = lpeg.Cf
--- local lpcb = lpeg.Cb
--- local lpcg = lpeg.Cg
--- local lpct = lpeg.Ct
--- local lpcs = lpeg.Cs
--- local lpcc = lpeg.Cc
--- local lpcmt = lpeg.Cmt
--- local lpcarg = lpeg.Carg
-
--- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end
-
--- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end
--- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end
--- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end
--- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end
--- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end
--- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end
--- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end
--- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end
--- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end
--- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end
--- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end
--- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end
--- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end
--- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end
-
-local type, next = type, next
-local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
-
--- Beware, we predefine a bunch of patterns here and one reason for doing so
--- is that we get consistent behaviour in some of the visualizers.
-
-lpeg.patterns = lpeg.patterns or { } -- so that we can share
-local patterns = lpeg.patterns
-
-local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp
-local lpegtype, lpegmatch = lpeg.type, lpeg.match
-
-local utfcharacters = string.utfcharacters
-local utfgmatch = unicode and unicode.utf8.gmatch
-
-local anything = P(1)
-local endofstring = P(-1)
-local alwaysmatched = P(true)
-
-patterns.anything = anything
-patterns.endofstring = endofstring
-patterns.beginofstring = alwaysmatched
-patterns.alwaysmatched = alwaysmatched
-
-local digit, sign = R('09'), S('+-')
-local cr, lf, crlf = P("\r"), P("\n"), P("\r\n")
-local newline = crlf + S("\r\n") -- cr + lf
-local escaped = P("\\") * anything
-local squote = P("'")
-local dquote = P('"')
-local space = P(" ")
-
-local utfbom_32_be = P('\000\000\254\255')
-local utfbom_32_le = P('\255\254\000\000')
-local utfbom_16_be = P('\255\254')
-local utfbom_16_le = P('\254\255')
-local utfbom_8 = P('\239\187\191')
-local utfbom = utfbom_32_be + utfbom_32_le
- + utfbom_16_be + utfbom_16_le
- + utfbom_8
-local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le"
- + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le"
- + utfbom_8 / "utf-8" + alwaysmatched / "unknown"
-
-local utf8next = R("\128\191")
-
-patterns.utf8one = R("\000\127")
-patterns.utf8two = R("\194\223") * utf8next
-patterns.utf8three = R("\224\239") * utf8next * utf8next
-patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next
-patterns.utfbom = utfbom
-patterns.utftype = utftype
-
-local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
-local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false)
-
-patterns.utf8 = utf8char
-patterns.utf8char = utf8char
-patterns.validutf8 = validutf8char
-patterns.validutf8char = validutf8char
-
-local eol = S("\n\r")
-local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
-local whitespace = eol + spacer
-
-patterns.digit = digit
-patterns.sign = sign
-patterns.cardinal = sign^0 * digit^1
-patterns.integer = sign^0 * digit^1
-patterns.unsigned = digit^0 * P('.') * digit^1
-patterns.float = sign^0 * patterns.unsigned
-patterns.cunsigned = digit^0 * P(',') * digit^1
-patterns.cfloat = sign^0 * patterns.cunsigned
-patterns.number = patterns.float + patterns.integer
-patterns.cnumber = patterns.cfloat + patterns.integer
-patterns.oct = P("0") * R("07")^1
-patterns.octal = patterns.oct
-patterns.HEX = P("0x") * R("09","AF")^1
-patterns.hex = P("0x") * R("09","af")^1
-patterns.hexadecimal = P("0x") * R("09","AF","af")^1
-patterns.lowercase = R("az")
-patterns.uppercase = R("AZ")
-patterns.letter = patterns.lowercase + patterns.uppercase
-patterns.space = space
-patterns.tab = P("\t")
-patterns.spaceortab = patterns.space + patterns.tab
-patterns.eol = eol
-patterns.spacer = spacer
-patterns.whitespace = whitespace
-patterns.newline = newline
-patterns.emptyline = newline^1
-patterns.nonspacer = 1 - spacer
-patterns.nonwhitespace = 1 - whitespace
-patterns.equal = P("=")
-patterns.comma = P(",")
-patterns.commaspacer = P(",") * spacer^0
-patterns.period = P(".")
-patterns.colon = P(":")
-patterns.semicolon = P(";")
-patterns.underscore = P("_")
-patterns.escaped = escaped
-patterns.squote = squote
-patterns.dquote = dquote
-patterns.nosquote = (escaped + (1-squote))^0
-patterns.nodquote = (escaped + (1-dquote))^0
-patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"")
-patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"")
-patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble
-patterns.unspacer = ((patterns.spacer^1)/"")^0
-
-patterns.singlequoted = squote * patterns.nosquote * squote
-patterns.doublequoted = dquote * patterns.nodquote * dquote
-patterns.quoted = patterns.doublequoted + patterns.singlequoted
-
-patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1
-patterns.beginline = #(1-newline)
-
--- print(string.unquoted("test"))
--- print(string.unquoted([["t\"est"]]))
--- print(string.unquoted([["t\"est"x]]))
--- print(string.unquoted("\'test\'"))
--- print(string.unquoted('"test"'))
--- print(string.unquoted('"test"'))
-
-local function anywhere(pattern) --slightly adapted from website
- return P { P(pattern) + 1 * V(1) }
-end
-
-lpeg.anywhere = anywhere
-
-function lpeg.instringchecker(p)
- p = anywhere(p)
- return function(str)
- return lpegmatch(p,str) and true or false
- end
-end
-
-function lpeg.splitter(pattern, action)
- return (((1-P(pattern))^1)/action+1)^0
-end
-
-function lpeg.tsplitter(pattern, action)
- return Ct((((1-P(pattern))^1)/action+1)^0)
-end
-
--- probleem: separator can be lpeg and that does not hash too well, but
--- it's quite okay as the key is then not garbage collected
-
-local splitters_s, splitters_m, splitters_t = { }, { }, { }
-
-local function splitat(separator,single)
- local splitter = (single and splitters_s[separator]) or splitters_m[separator]
- if not splitter then
- separator = P(separator)
- local other = C((1 - separator)^0)
- if single then
- local any = anything
- splitter = other * (separator * C(any^0) + "") -- ?
- splitters_s[separator] = splitter
- else
- splitter = other * (separator * other)^0
- splitters_m[separator] = splitter
- end
- end
- return splitter
-end
-
-local function tsplitat(separator)
- local splitter = splitters_t[separator]
- if not splitter then
- splitter = Ct(splitat(separator))
- splitters_t[separator] = splitter
- end
- return splitter
-end
-
-lpeg.splitat = splitat
-lpeg.tsplitat = tsplitat
-
-function string.splitup(str,separator)
- if not separator then
- separator = ","
- end
- return lpegmatch(splitters_m[separator] or splitat(separator),str)
-end
-
-
-local cache = { }
-
-function lpeg.split(separator,str)
- local c = cache[separator]
- if not c then
- c = tsplitat(separator)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-function string.split(str,separator)
- if separator then
- local c = cache[separator]
- if not c then
- c = tsplitat(separator)
- cache[separator] = c
- end
- return lpegmatch(c,str)
- else
- return { str }
- end
-end
-
-local spacing = patterns.spacer^0 * newline -- sort of strip
-local empty = spacing * Cc("")
-local nonempty = Cs((1-spacing)^1) * spacing^-1
-local content = (empty + nonempty)^1
-
-patterns.textline = content
-
-
-local linesplitter = tsplitat(newline)
-
-patterns.linesplitter = linesplitter
-
-function string.splitlines(str)
- return lpegmatch(linesplitter,str)
-end
-
-local utflinesplitter = utfbom^-1 * tsplitat(newline)
-
-patterns.utflinesplitter = utflinesplitter
-
-function string.utfsplitlines(str)
- return lpegmatch(utflinesplitter,str or "")
-end
-
-local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0)
-local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0)
-
-function string.utfsplit(str,ignorewhitespace) -- new
- if ignorewhitespace then
- return lpegmatch(utfcharsplitter_iws,str or "")
- else
- return lpegmatch(utfcharsplitter_ows,str or "")
- end
-end
-
--- inspect(string.utfsplit("a b c d"))
--- inspect(string.utfsplit("a b c d",true))
-
--- -- alternative 1: 0.77
---
--- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0)
---
--- function string.utflength(str)
--- return #lpegmatch(utfcharcounter,str or "")
--- end
---
--- -- alternative 2: 1.70
---
--- local n = 0
---
--- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow
---
--- function string.utflength(str)
--- n = 0
--- lpegmatch(utfcharcounter,str or "")
--- return n
--- end
---
--- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
-
-local n = 0
-
-local utfcharcounter = utfbom^-1 * Cs ( (
- Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
- + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
- + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
- + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
-)^0 )
-
-function string.utflength(str)
- n = 0
- lpegmatch(utfcharcounter,str or "")
- return n
-end
-
-
-local cache = { }
-
-function lpeg.checkedsplit(separator,str)
- local c = cache[separator]
- if not c then
- separator = P(separator)
- local other = C((1 - separator)^1)
- c = Ct(separator^0 * other * (separator^1 * other)^0)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-function string.checkedsplit(str,separator)
- local c = cache[separator]
- if not c then
- separator = P(separator)
- local other = C((1 - separator)^1)
- c = Ct(separator^0 * other * (separator^1 * other)^0)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-
-local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
-local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
-local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
-
-local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
-
-patterns.utf8byte = utf8byte
-
-
-
-local cache = { }
-
-function lpeg.stripper(str)
- if type(str) == "string" then
- local s = cache[str]
- if not s then
- s = Cs(((S(str)^1)/"" + 1)^0)
- cache[str] = s
- end
- return s
- else
- return Cs(((str^1)/"" + 1)^0)
- end
-end
-
-local cache = { }
-
-function lpeg.keeper(str)
- if type(str) == "string" then
- local s = cache[str]
- if not s then
- s = Cs((((1-S(str))^1)/"" + 1)^0)
- cache[str] = s
- end
- return s
- else
- return Cs((((1-str)^1)/"" + 1)^0)
- end
-end
-
-function lpeg.frontstripper(str) -- or pattern (yet undocumented)
- return (P(str) + P(true)) * Cs(anything^0)
-end
-
-function lpeg.endstripper(str) -- or pattern (yet undocumented)
- return Cs((1 - P(str) * endofstring)^0)
-end
-
--- Just for fun I looked at the used bytecode and
--- p = (p and p + pp) or pp gets one more (testset).
-
-function lpeg.replacer(one,two,makefunction)
- local pattern
- if type(one) == "table" then
- local no = #one
- local p = P(false)
- if no == 0 then
- for k, v in next, one do
- p = p + P(k) / v
- end
- pattern = Cs((p + 1)^0)
- elseif no == 1 then
- local o = one[1]
- one, two = P(o[1]), o[2]
- -- pattern = Cs(((1-one)^1 + one/two)^0)
- pattern = Cs((one/two + 1)^0)
- else
- for i=1,no do
- local o = one[i]
- p = p + P(o[1]) / o[2]
- end
- pattern = Cs((p + 1)^0)
- end
- else
- one = P(one)
- two = two or ""
- -- pattern = Cs(((1-one)^1 + one/two)^0)
- pattern = Cs((one/two +1)^0)
- end
- if makefunction then
- return function(str)
- return lpegmatch(pattern,str)
- end
- else
- return pattern
- end
-end
-
-function lpeg.finder(lst,makefunction)
- local pattern
- if type(lst) == "table" then
- local p = P(false)
- for i=1,#lst do
- p = p + P(lst[i])
- end
- pattern = (p + 1)^0
- else
- pattern = (P(lst) + 1)^0
- end
- if makefunction then
- return function(str)
- return lpegmatch(pattern,str)
- end
- else
- return pattern
- end
-end
-
--- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
--- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
--- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
-
-local splitters_f, splitters_s = { }, { }
-
-function lpeg.firstofsplit(separator) -- always return value
- local splitter = splitters_f[separator]
- if not splitter then
- separator = P(separator)
- splitter = C((1 - separator)^0)
- splitters_f[separator] = splitter
- end
- return splitter
-end
-
-function lpeg.secondofsplit(separator) -- nil if not split
- local splitter = splitters_s[separator]
- if not splitter then
- separator = P(separator)
- splitter = (1 - separator)^0 * separator * C(anything^0)
- splitters_s[separator] = splitter
- end
- return splitter
-end
-
-function lpeg.balancer(left,right)
- left, right = P(left), P(right)
- return P { left * ((1 - left - right) + V(1))^0 * right }
-end
-
-
-
-local nany = utf8char/""
-
-function lpeg.counter(pattern)
- pattern = Cs((P(pattern)/" " + nany)^0)
- return function(str)
- return #lpegmatch(pattern,str)
- end
-end
-
-if utfgmatch then
-
- function lpeg.count(str,what) -- replaces string.count
- if type(what) == "string" then
- local n = 0
- for _ in utfgmatch(str,what) do
- n = n + 1
- end
- return n
- else -- 4 times slower but still faster than / function
- return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
- end
- end
-
-else
-
- local cache = { }
-
- function lpeg.count(str,what) -- replaces string.count
- if type(what) == "string" then
- local p = cache[what]
- if not p then
- p = Cs((P(what)/" " + nany)^0)
- cache[p] = p
- end
- return #lpegmatch(p,str)
- else -- 4 times slower but still faster than / function
- return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
- end
- end
-
-end
-
-local patterns_escapes = { -- also defines in l-string
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%)", [")"] = "%)",
- -- ["{"] = "%{", ["}"] = "%}"
- -- ["^"] = "%^", ["$"] = "%$",
-}
-
-local simple_escapes = { -- also defines in l-string
- ["-"] = "%-",
- ["."] = "%.",
- ["?"] = ".",
- ["*"] = ".*",
-}
-
-local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0)
-local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0)
-
-function string.escapedpattern(str,simple)
- return lpegmatch(simple and s or p,str)
-end
-
--- utf extensies
-
-lpeg.UP = lpeg.P
-
-if utfcharacters then
-
- function lpeg.US(str)
- local p = P(false)
- for uc in utfcharacters(str) do
- p = p + P(uc)
- end
- return p
- end
-
-
-elseif utfgmatch then
-
- function lpeg.US(str)
- local p = P(false)
- for uc in utfgmatch(str,".") do
- p = p + P(uc)
- end
- return p
- end
-
-else
-
- function lpeg.US(str)
- local p = P(false)
- local f = function(uc)
- p = p + P(uc)
- end
- lpegmatch((utf8char/f)^0,str)
- return p
- end
-
-end
-
-local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture
-
-local utfchar = unicode and unicode.utf8 and unicode.utf8.char
-
-function lpeg.UR(str,more)
- local first, last
- if type(str) == "number" then
- first = str
- last = more or first
- else
- first, last = lpegmatch(range,str)
- if not last then
- return P(str)
- end
- end
- if first == last then
- return P(str)
- elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium
- local p = P(false)
- for i=first,last do
- p = p + P(utfchar(i))
- end
- return p -- nil when invalid range
- else
- local f = function(b)
- return b >= first and b <= last
- end
- -- tricky, these nested captures
- return utf8byte / f -- nil when invalid range
- end
-end
-
--- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω"))
-
-
-
-function lpeg.is_lpeg(p)
- return p and lpegtype(p) == "pattern"
-end
-
-function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
- if type(list) ~= "table" then
- list = { list, ... }
- end
- -- table.sort(list) -- longest match first
- local p = P(list[1])
- for l=2,#list do
- p = p + P(list[l])
- end
- return p
-end
-
--- For the moment here, but it might move to utilities. Beware, we need to
--- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
--- loop back from the end cq. prepend.
-
-local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency!
-
-function lpeg.append(list,pp,delayed,checked)
- local p = pp
- if #list > 0 then
- local keys = fastcopy(list)
- sort(keys)
- for i=#keys,1,-1 do
- local k = keys[i]
- if p then
- p = P(k) + p
- else
- p = P(k)
- end
- end
- elseif delayed then -- hm, it looks like the lpeg parser resolves anyway
- local keys = sortedkeys(list)
- if p then
- for i=1,#keys,1 do
- local k = keys[i]
- local v = list[k]
- p = P(k)/list + p
- end
- else
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- p = P(k) + p
- else
- p = P(k)
- end
- end
- if p then
- p = p / list
- end
- end
- elseif checked then
- -- problem: substitution gives a capture
- local keys = sortedkeys(list)
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- if k == v then
- p = P(k) + p
- else
- p = P(k)/v + p
- end
- else
- if k == v then
- p = P(k)
- else
- p = P(k)/v
- end
- end
- end
- else
- local keys = sortedkeys(list)
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- p = P(k)/v + p
- else
- p = P(k)/v
- end
- end
- end
- return p
-end
-
--- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true))
--- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true))
-
--- function lpeg.exact_match(words,case_insensitive)
--- local pattern = concat(words)
--- if case_insensitive then
--- local pattern = S(upper(characters)) + S(lower(characters))
--- local list = { }
--- for i=1,#words do
--- list[lower(words[i])] = true
--- end
--- return Cmt(pattern^1, function(_,i,s)
--- return list[lower(s)] and i
--- end)
--- else
--- local pattern = S(concat(words))
--- local list = { }
--- for i=1,#words do
--- list[words[i]] = true
--- end
--- return Cmt(pattern^1, function(_,i,s)
--- return list[s] and i
--- end)
--- end
--- end
-
--- experiment:
-
-local function make(t)
- local p
--- for k, v in next, t do
- for k, v in table.sortedhash(t) do
- if not p then
- if next(v) then
- p = P(k) * make(v)
- else
- p = P(k)
- end
- else
- if next(v) then
- p = p + P(k) * make(v)
- else
- p = p + P(k)
- end
- end
- end
- return p
-end
-
-function lpeg.utfchartabletopattern(list)
- local tree = { }
- for i=1,#list do
- local t = tree
- for c in gmatch(list[i],".") do
- if not t[c] then
- t[c] = { }
- end
- t = t[c]
- end
- end
- return make(tree)
-end
-
--- inspect ( lpeg.utfchartabletopattern {
--- utfchar(0x00A0), -- nbsp
--- utfchar(0x2000), -- enquad
--- utfchar(0x2001), -- emquad
--- utfchar(0x2002), -- enspace
--- utfchar(0x2003), -- emspace
--- utfchar(0x2004), -- threeperemspace
--- utfchar(0x2005), -- fourperemspace
--- utfchar(0x2006), -- sixperemspace
--- utfchar(0x2007), -- figurespace
--- utfchar(0x2008), -- punctuationspace
--- utfchar(0x2009), -- breakablethinspace
--- utfchar(0x200A), -- hairspace
--- utfchar(0x200B), -- zerowidthspace
--- utfchar(0x202F), -- narrownobreakspace
--- utfchar(0x205F), -- math thinspace
--- } )
-
--- handy from within tex:
-
-local lpegmatch = lpeg.match
-
-local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
-
-function string.tformat(fmt,...)
- return format(lpegmatch(replacer,fmt),...)
-end
-
--- strips leading and trailing spaces and collapsed all other spaces
-
-local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0)
-
-function string.collapsespaces(str)
- return lpegmatch(pattern,str)
-end
-
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
if not modules then modules = { } end modules ['l-io'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
@@ -2368,7 +2526,7 @@ function io.readstring(f,n,m)
f:seek("set",n)
n = m
end
- local str = gsub(f:read(n),"%z","")
+ local str = gsub(f:read(n),"\000","")
return str
end
@@ -2418,10 +2576,129 @@ local lpegmatch = lpeg.match
number = number or { }
local number = number
--- a,b,c,d,e,f = number.toset(100101)
+if bit32 then
+
+ local btest, bor = bit32.btest, bit32.bor
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ number.hasbit = btest
+ number.setbit = bor
+
+ function number.setbit(x,p)
+ return btest(x,p) and x or x + p
+ end
+
+ function number.clearbit(x,p)
+ return btest(x,p) and x - p or x
+ end
+
+else
+
+ -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ...
+ return x % (p + p) >= p
+ end
+
+ function number.setbit(x, p)
+ return (x % (p + p) >= p) and x or x + p
+ end
+
+ function number.clearbit(x, p)
+ return (x % (p + p) >= p) and x - p or x
+ end
+
+end
+
+-- print(number.tobitstring(8))
+-- print(number.tobitstring(14))
+-- print(number.tobitstring(66))
+-- print(number.tobitstring(0x00))
+-- print(number.tobitstring(0xFF))
+-- print(number.tobitstring(46260767936,4))
+
+if bit32 then
+
+ local bextract = bit32.extract
+
+ local t = {
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ }
+
+ function number.tobitstring(b,m)
+ -- if really needed we can speed this one up
+ -- because small numbers need less extraction
+ local n = 32
+ for i=0,31 do
+ local v = bextract(b,i)
+ local k = 32 - i
+ if v == 1 then
+ n = k
+ t[k] = "1"
+ else
+ t[k] = "0"
+ end
+ end
+ if m then
+ m = 33 - m * 8
+ if m < 1 then
+ m = 1
+ end
+ return concat(t,"",m)
+ elseif n < 8 then
+ return concat(t)
+ elseif n < 16 then
+ return concat(t,"",9)
+ elseif n < 24 then
+ return concat(t,"",17)
+ else
+ return concat(t,"",25)
+ end
+ end
+
+else
+
+ function number.tobitstring(n,m)
+ if n > 0 then
+ local t = { }
+ while n > 0 do
+ insert(t,1,n % 2 > 0 and 1 or 0)
+ n = floor(n/2)
+ end
+ local nn = 8 - #t % 8
+ if nn > 0 and nn < 8 then
+ for i=1,nn do
+ insert(t,1,0)
+ end
+ end
+ if m then
+ m = m * 8 - #t
+ if m > 0 then
+ insert(t,1,rep("0",m))
+ end
+ end
+ return concat(t)
+ elseif m then
+ rep("00000000",m)
+ else
+ return "00000000"
+ end
+ end
-function number.toset(n)
- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+end
+
+function number.valid(str,default)
+ return tonumber(str) or default or nil
end
function number.toevenhex(n)
@@ -2433,86 +2710,59 @@ function number.toevenhex(n)
end
end
--- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
--- on
+-- a,b,c,d,e,f = number.toset(100101)
+--
+-- function number.toset(n)
+-- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+-- end
+--
+-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
+-- -- on
--
-- for i=1,1000000 do
-- local a,b,c,d,e,f,g,h = number.toset(12345678)
-- local a,b,c,d = number.toset(1234)
-- local a,b,c = number.toset(123)
+-- local a,b,c = number.toset("123")
-- end
---
--- of course dedicated "(.)(.)(.)(.)" matches are even faster
-local one = lpeg.C(1-lpeg.S(''))^1
+local one = lpeg.C(1-lpeg.S('')/tonumber)^1
function number.toset(n)
return lpegmatch(one,tostring(n))
end
-function number.bits(n,zero)
- local t, i = { }, (zero and 0) or 1
- while n > 0 do
+-- function number.bits(n,zero)
+-- local t, i = { }, (zero and 0) or 1
+-- while n > 0 do
+-- local m = n % 2
+-- if m > 0 then
+-- insert(t,1,i)
+-- end
+-- n = floor(n/2)
+-- i = i + 1
+-- end
+-- return t
+-- end
+--
+-- -- a bit faster
+
+local function bits(n,i,...)
+ if n > 0 then
local m = n % 2
+ local n = floor(n/2)
if m > 0 then
- insert(t,1,i)
- end
- n = floor(n/2)
- i = i + 1
- end
- return t
-end
-
-
-function number.bit(p)
- return 2 ^ (p - 1) -- 1-based indexing
-end
-
-function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ...
- return x % (p + p) >= p
-end
-
-function number.setbit(x, p)
- return (x % (p + p) >= p) and x or x + p
-end
-
-function number.clearbit(x, p)
- return (x % (p + p) >= p) and x - p or x
-end
-
-
-function number.tobitstring(n,m)
- if n == 0 then
- if m then
- rep("00000000",m)
+ return bits(n, i+1, i, ...)
else
- return "00000000"
+ return bits(n, i+1, ...)
end
else
- local t = { }
- while n > 0 do
- insert(t,1,n % 2 > 0 and 1 or 0)
- n = floor(n/2)
- end
- local nn = 8 - #t % 8
- if nn > 0 and nn < 8 then
- for i=1,nn do
- insert(t,1,0)
- end
- end
- if m then
- m = m * 8 - #t
- if m > 0 then
- insert(t,1,rep("0",m))
- end
- end
- return concat(t)
+ return ...
end
end
-
-function number.valid(str,default)
- return tonumber(str) or default or nil
+function number.bits(n)
+ return { bits(n,1) }
end
@@ -3104,25 +3354,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1
local pattern = C((noslashes^0 * slashes^1)^1)
local function pathpart(name,default)
- return lpegmatch(pattern,name) or default or ""
+ return name and lpegmatch(pattern,name) or default or ""
end
local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1
local function basename(name)
- return lpegmatch(pattern,name) or name
+ return name and lpegmatch(pattern,name) or name
end
local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0
local function nameonly(name)
- return lpegmatch(pattern,name) or name
+ return name and lpegmatch(pattern,name) or name
end
local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1
local function suffixonly(name)
- return lpegmatch(pattern,name) or ""
+ return name and lpegmatch(pattern,name) or ""
end
file.pathpart = pathpart
@@ -3153,7 +3403,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture
local pattern_d = path * rest
function file.splitname(str,splitdrive)
- if splitdrive then
+ if not str then
+ -- error
+ elseif splitdrive then
return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix
else
return lpegmatch(pattern_b,str) -- returns path, base, suffix
@@ -3161,34 +3413,36 @@ function file.splitname(str,splitdrive)
end
function file.splitbase(str)
- return lpegmatch(pattern_d,str) -- returns path, base+suffix
+ return str and lpegmatch(pattern_d,str) -- returns path, base+suffix
end
function file.nametotable(str,splitdrive) -- returns table
- local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
- if splitdrive then
- return {
- path = path,
- drive = drive,
- subpath = subpath,
- name = name,
- base = base,
- suffix = suffix,
- }
- else
- return {
- path = path,
- name = name,
- base = base,
- suffix = suffix,
- }
+ if str then
+ local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
+ if splitdrive then
+ return {
+ path = path,
+ drive = drive,
+ subpath = subpath,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ else
+ return {
+ path = path,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ end
end
end
local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1)
function file.removesuffix(name)
- return lpegmatch(pattern,name)
+ return name and lpegmatch(pattern,name)
end
-- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1
@@ -3205,8 +3459,8 @@ end
local suffix = period/"" * (1-period-slashes)^1 * -1
local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix)
-function file.addsuffix(filename, suffix, criterium)
- if not suffix or suffix == "" then
+function file.addsuffix(filename,suffix,criterium)
+ if not filename or not suffix or suffix == "" then
return filename
elseif criterium == true then
return filename .. "." .. suffix
@@ -3252,7 +3506,7 @@ local suffix = period * (1-period-slashes)^1 * -1
local pattern = Cs((1-suffix)^0)
function file.replacesuffix(name,suffix)
- if suffix and suffix ~= "" then
+ if name and suffix and suffix ~= "" then
return lpegmatch(pattern,name) .. "." .. suffix
else
return name
@@ -3261,10 +3515,10 @@ end
--
-local reslasher = lpeg.replacer(S("\\"),"/")
+local reslasher = lpeg.replacer(P("\\"),"/")
function file.reslash(str)
- return lpegmatch(reslasher,str)
+ return str and lpegmatch(reslasher,str)
end
-- We should be able to use:
@@ -3280,7 +3534,9 @@ end
-- variant:
function file.is_writable(name)
- if lfs.isdir(name) then
+ if not name then
+ -- error
+ elseif lfs.isdir(name) then
name = name .. "/m_t_x_t_e_s_t.tmp"
local f = io.open(name,"wb")
if f then
@@ -3308,24 +3564,32 @@ end
local readable = P("r") * Cc(true)
function file.is_readable(name)
- local a = attributes(name)
- return a and lpegmatch(readable,a.permissions) or false
+ if name then
+ local a = attributes(name)
+ return a and lpegmatch(readable,a.permissions) or false
+ else
+ return false
+ end
end
file.isreadable = file.is_readable -- depricated
file.iswritable = file.is_writable -- depricated
function file.size(name)
- local a = attributes(name)
- return a and a.size or 0
+ if name then
+ local a = attributes(name)
+ return a and a.size or 0
+ else
+ return 0
+ end
end
function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split)
- return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
+ return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
end
function file.joinpath(tab,separator) -- table
- return concat(tab,separator or io.pathseparator) -- can have trailing //
+ return tab and concat(tab,separator or io.pathseparator) -- can have trailing //
end
local stripper = Cs(P(fwslash)^0/"" * reslasher)
@@ -3333,14 +3597,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon
local isroot = fwslash^1 * -1
local hasroot = fwslash^1
-function file.join(...) -- rather dirty
+local deslasher = lpeg.replacer(S("\\/")^1,"/")
+
+-- If we have a network or prefix then there is a change that we end up with two
+-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2)
+-- split and rebuild as url. Of course we could assume no network paths (which
+-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but
+-- then we still have to deal with urls ... anyhow, multiple // are never a real
+-- problem but just ugly.
+
+function file.join(...)
local lst = { ... }
local one = lst[1]
if lpegmatch(isnetwork,one) then
- local two = lpegmatch(reslasher,concat(lst,"/",2))
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
return one .. "/" .. two
elseif lpegmatch(isroot,one) then
- local two = lpegmatch(reslasher,concat(lst,"/",2))
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
if lpegmatch(hasroot,two) then
return two
else
@@ -3349,7 +3622,7 @@ function file.join(...) -- rather dirty
elseif one == "" then
return lpegmatch(stripper,concat(lst,"/",2))
else
- return lpegmatch(reslasher,concat(lst,"/"))
+ return lpegmatch(deslasher,concat(lst,"/"))
end
end
@@ -3378,6 +3651,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C
local absolute = fwslash
function file.collapsepath(str,anchor)
+ if not str then
+ return
+ end
if anchor and not lpegmatch(anchors,str) then
str = getcurrentdir() .. "/" .. str
end
@@ -3387,7 +3663,6 @@ function file.collapsepath(str,anchor)
return lpegmatch(reslasher,str)
end
local starter, oldelements = lpegmatch(splitstarter,str)
--- inspect(oldelements)
local newelements = { }
local i = #oldelements
while i > 0 do
@@ -3441,11 +3716,13 @@ local whatever = P("-")^0 / ""
local pattern_b = Cs(whatever * (1 - whatever * -1)^1)
function file.robustname(str,strict)
- str = lpegmatch(pattern_a,str) or str
- if strict then
- return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
- else
- return str
+ if str then
+ str = lpegmatch(pattern_a,str) or str
+ if strict then
+ return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
+ else
+ return str
+ end
end
end
@@ -3453,7 +3730,9 @@ file.readdata = io.loaddata
file.savedata = io.savedata
function file.copy(oldname,newname)
- file.savedata(newname,io.loaddata(oldname))
+ if oldname and newname then
+ file.savedata(newname,io.loaddata(oldname))
+ end
end
-- also rewrite previous
@@ -3474,11 +3753,11 @@ lpeg.patterns.rootbased = rootbased
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return lpegmatch(qualified,filename) ~= nil
+ return filename and lpegmatch(qualified,filename) ~= nil
end
function file.is_rootbased_path(filename)
- return lpegmatch(rootbased,filename) ~= nil
+ return filename and lpegmatch(rootbased,filename) ~= nil
end
-- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end
@@ -3500,8 +3779,10 @@ end
-- for myself:
function file.strip(name,dir)
- local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
- return a ~= "" and a or name
+ if name then
+ local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
+ return a ~= "" and a or name
+ end
end
-- local debuglist = {
@@ -3943,7 +4224,7 @@ if not modules then modules = { } end modules ['l-dir'] = {
-- dir.expandname will be merged with cleanpath and collapsepath
-local type = type
+local type, select = type, select
local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub
local concat, insert, remove = table.concat, table.insert, table.remove
local lpegmatch = lpeg.match
@@ -4165,15 +4446,15 @@ local onwindows = os.type == "windows" or find(os.getenv("PATH"),";")
if onwindows then
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
- if s ~= "" then
- if str ~= "" then
- str = str .. "/" .. s
- else
- str = s
- end
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
+ if s == "" then
+ -- skip
+ elseif str == "" then
+ str = s
+ else
+ str = str .. "/" .. s
end
end
local first, middle, last
@@ -4222,9 +4503,9 @@ if onwindows then
else
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
if s and s ~= "" then -- we catch nil and false
if str ~= "" then
str = str .. "/" .. s
@@ -4424,29 +4705,45 @@ if not modules then modules = { } end modules ['l-unicode'] = {
-- todo: utf.sub replacement (used in syst-aux)
-local concat = table.concat
+-- we put these in the utf namespace:
+
+utf = utf or (unicode and unicode.utf8) or { }
+
+utf.characters = utf.characters or string.utfcharacters
+utf.values = utf.values or string.utfvalues
+
+-- string.utfvalues
+-- string.utfcharacters
+-- string.characters
+-- string.characterpairs
+-- string.bytes
+-- string.bytepairs
+
local type = type
-local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
+local char, byte, format, sub = string.char, string.byte, string.format, string.sub
+local concat = table.concat
+local P, C, R, Cs, Ct, Cmt, Cc, Carg = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg
local lpegmatch, patterns = lpeg.match, lpeg.patterns
-local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
-local utfsplitlines = string.utfsplitlines
-if not unicode then
+local bytepairs = string.bytepairs
- unicode = { }
+local finder = lpeg.finder
+local replacer = lpeg.replacer
-end
+local utfvalues = utf.values
+local utfgmatch = utf.gmatch -- not always present
-local unicode = unicode
+local p_utftype = patterns.utftype
+local p_utfoffset = patterns.utfoffset
+local p_utf8char = patterns.utf8char
+local p_utf8byte = patterns.utf8byte
+local p_utfbom = patterns.utfbom
+local p_newline = patterns.newline
+local p_whitespace = patterns.whitespace
-utf = utf or unicode.utf8
-
-if not utf then
+if not unicode then
- utf8 = { }
- unicode.utf8 = utf8
- utf = utf8
+ unicode = { utf = utf } -- for a while
end
@@ -4503,64 +4800,13 @@ if not utf.byte then
end
-if not utf.sub then
-
- local utf8char = patterns.utf8char
-
- -- inefficient as lpeg just copies ^n
-
- -- local function sub(str,start,stop)
- -- local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
- -- inspect(pattern)
- -- return lpegmatch(pattern,str) or ""
- -- end
-
- local b, e, n, first, last = 0, 0, 0, 0, 0
-
- local function slide(s,p)
- n = n + 1
- if n == first then
- b = p
- if not last then
- return nil
- end
- end
- if n == last then
- e = p
- return nil
- else
- return p
- end
- end
-
- local pattern = Cmt(utf8char,slide)^0
-
- function utf.sub(str,start,stop) -- todo: from the end
- if not start then
- return str
- end
- b, e, n, first, last = 0, 0, 0, start, stop
- lpegmatch(pattern,str)
- if not stop then
- return sub(str,b)
- else
- return sub(str,b,e)
- end
- end
-
- -- print(utf.sub("Hans Hagen is my name"))
- -- print(utf.sub("Hans Hagen is my name",5))
- -- print(utf.sub("Hans Hagen is my name",5,10))
-
-end
-
local utfchar, utfbyte = utf.char, utf.byte
-- As we want to get rid of the (unmaintained) utf library we implement our own
-- variants (in due time an independent module):
-function unicode.filetype(data)
- return data and lpegmatch(utftype,data) or "unknown"
+function utf.filetype(data)
+ return data and lpegmatch(p_utftype,data) or "unknown"
end
local toentities = Cs (
@@ -4647,7 +4893,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
-function string.toutf(s)
+function string.toutf(s) -- in string namespace
return lpegmatch(pattern,s) or s -- todo: utf32
end
@@ -4663,26 +4909,269 @@ local validatedutf = Cs (
patterns.validatedutf = validatedutf
-function string.validutf(str)
- return lpegmatch(validatedutf,str)
+function utf.is_valid(str)
+ return type(str) == "string" and lpegmatch(validatedutf,str) or false
end
+if not utf.len then
-utf.length = string.utflength
-utf.split = string.utfsplit
-utf.splitines = string.utfsplitlines
-utf.valid = string.validutf
+ -- -- alternative 1: 0.77
+ --
+ -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0)
+ --
+ -- function utf.len(str)
+ -- return #lpegmatch(utfcharcounter,str or "")
+ -- end
+ --
+ -- -- alternative 2: 1.70
+ --
+ -- local n = 0
+ --
+ -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow
+ --
+ -- function utf.length(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+ --
+ -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
+
+ -- local n = 0
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * (
+ -- -- patterns.utf8one ^1 * Cc(1)
+ -- -- + patterns.utf8two ^1 * Cc(2)
+ -- -- + patterns.utf8three^1 * Cc(3)
+ -- -- + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end
+ -- -- )^0 ) -- just as many captures as below
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( (
+ -- -- (Cmt(patterns.utf8one ^1,function(_,_,s) n = n + #s return true end))
+ -- -- + (Cmt(patterns.utf8two ^1,function(_,_,s) n = n + #s/2 return true end))
+ -- -- + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end))
+ -- -- + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end))
+ -- -- )^0 ) -- not interesting as it creates strings but sometimes faster
+ --
+ -- -- The best so far:
+ --
+ -- local utfcharcounter = utfbom^-1 * P ( (
+ -- Cp() * (patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
+ -- + Cp() * (patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
+ -- + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
+ -- + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
+ -- )^0 )
+
+ -- function utf.len(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+
+ local n, f = 0, 1
+
+ local utfcharcounter = patterns.utfbom^-1 * Cmt (
+ Cc(1) * patterns.utf8one ^1
+ + Cc(2) * patterns.utf8two ^1
+ + Cc(3) * patterns.utf8three^1
+ + Cc(4) * patterns.utf8four ^1,
+ function(_,t,d) -- due to Cc no string captures, so faster
+ n = n + (t - f)/d
+ f = t
+ return true
+ end
+ )^0
+
+ function utf.len(str)
+ n, f = 0, 1
+ lpegmatch(utfcharcounter,str or "")
+ return n
+ end
-if not utf.len then
- utf.len = utf.length
end
--- a replacement for simple gsubs:
+utf.length = utf.len
+
+if not utf.sub then
+
+ -- inefficient as lpeg just copies ^n
+
+ -- local function sub(str,start,stop)
+ -- local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1))
+ -- inspect(pattern)
+ -- return lpegmatch(pattern,str) or ""
+ -- end
+
+ -- local b, e, n, first, last = 0, 0, 0, 0, 0
+ --
+ -- local function slide(s,p)
+ -- n = n + 1
+ -- if n == first then
+ -- b = p
+ -- if not last then
+ -- return nil
+ -- end
+ -- end
+ -- if n == last then
+ -- e = p
+ -- return nil
+ -- else
+ -- return p
+ -- end
+ -- end
+ --
+ -- local pattern = Cmt(p_utf8char,slide)^0
+ --
+ -- function utf.sub(str,start,stop) -- todo: from the end
+ -- if not start then
+ -- return str
+ -- end
+ -- b, e, n, first, last = 0, 0, 0, start, stop
+ -- lpegmatch(pattern,str)
+ -- if not stop then
+ -- return sub(str,b)
+ -- else
+ -- return sub(str,b,e-1)
+ -- end
+ -- end
+
+ -- print(utf.sub("Hans Hagen is my name"))
+ -- print(utf.sub("Hans Hagen is my name",5))
+ -- print(utf.sub("Hans Hagen is my name",5,10))
+
+ local utflength = utf.length
+
+ -- also negative indices, upto 10 times slower than a c variant
+
+ local b, e, n, first, last = 0, 0, 0, 0, 0
+
+ local function slide_zero(s,p)
+ n = n + 1
+ if n >= last then
+ e = p - 1
+ else
+ return p
+ end
+ end
+
+ local function slide_one(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ end
+ if n >= last then
+ e = p - 1
+ else
+ return p
+ end
+ end
+
+ local function slide_two(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ else
+ return true
+ end
+ end
+
+ local pattern_zero = Cmt(p_utf8char,slide_zero)^0
+ local pattern_one = Cmt(p_utf8char,slide_one )^0
+ local pattern_two = Cmt(p_utf8char,slide_two )^0
+
+ function utf.sub(str,start,stop)
+ if not start then
+ return str
+ end
+ if start == 0 then
+ start = 1
+ end
+ if not stop then
+ if start < 0 then
+ local l = utflength(str) -- we can inline this function if needed
+ start = l + start
+ else
+ start = start - 1
+ end
+ b, n, first = 0, 0, start
+ lpegmatch(pattern_two,str)
+ if n >= first then
+ return sub(str,b)
+ else
+ return ""
+ end
+ end
+ if start < 0 or stop < 0 then
+ local l = utf.length(str)
+ if start < 0 then
+ start = l + start
+ if start <= 0 then
+ start = 1
+ else
+ start = start + 1
+ end
+ end
+ if stop < 0 then
+ stop = l + stop
+ if stop == 0 then
+ stop = 1
+ else
+ stop = stop + 1
+ end
+ end
+ end
+ if start > stop then
+ return ""
+ elseif start > 1 then
+ b, e, n, first, last = 0, 0, 0, start - 1, stop
+ lpegmatch(pattern_one,str)
+ if n >= first and e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ else
+ b, e, n, last = 1, 0, 0, stop
+ lpegmatch(pattern_zero,str)
+ if e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ end
+ end
-local utf8char = patterns.utf8char
+ -- local n = 100000
+ -- local str = string.rep("123456àáâãäå",100)
+ --
+ -- for i=-15,15,1 do
+ -- for j=-15,15,1 do
+ -- if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then
+ -- print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j))
+ -- end
+ -- end
+ -- if utf.xsub(str,i) ~= utf.sub(str,i) then
+ -- print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i))
+ -- end
+ -- end
+
+ -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7))
+ -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7))
+ -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9))
+ -- print(" 4 ",utf.xsub(str, 4 ),utf.sub(str, 4 ))
+ -- print(" 0 ",utf.xsub(str, 0 ),utf.sub(str, 0 ))
+ -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0))
+ -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4))
+ -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0))
+ -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0))
+ -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3))
+ -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3))
+ -- print("-3 ",utf.xsub(str,-3 ),utf.sub(str,-3 ))
+
+end
+
+-- a replacement for simple gsubs:
function utf.remapper(mapping)
- local pattern = Cs((utf8char/mapping)^0)
+ local pattern = Cs((p_utf8char/mapping)^0)
return function(str)
if not str or str == "" then
return ""
@@ -4695,55 +5184,113 @@ end
-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" }
-- print(remap("abcd 1234 abcd"))
+--
+
+function utf.replacer(t) -- no precheck, always string builder
+ local r = replacer(t,false,false,true)
+ return function(str)
+ return lpegmatch(r,str)
+ end
+end
+
+function utf.subtituter(t) -- with precheck and no building if no match
+ local f = finder (t)
+ local r = replacer(t,false,false,true)
+ return function(str)
+ local i = lpegmatch(f,str)
+ if not i then
+ return str
+ elseif i > #str then
+ return str
+ else
+ -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower
+ return lpegmatch(r,str)
+ end
+ end
+end
+
+-- inspect(utf.split("a b c d"))
+-- inspect(utf.split("a b c d",true))
+
+local utflinesplitter = p_utfbom^-1 * lpeg.tsplitat(p_newline)
+local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0)
+local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0)
+local utfcharsplitter_raw = Ct(C(p_utf8char)^0)
+
+patterns.utflinesplitter = utflinesplitter
+
+function utf.splitlines(str)
+ return lpegmatch(utflinesplitter,str or "")
+end
+
+function utf.split(str,ignorewhitespace) -- new
+ if ignorewhitespace then
+ return lpegmatch(utfcharsplitter_iws,str or "")
+ else
+ return lpegmatch(utfcharsplitter_ows,str or "")
+ end
+end
+
+function utf.totable(str) -- keeps bom
+ return lpegmatch(utfcharsplitter_raw,str)
+end
+
-- 0 EF BB BF UTF-8
-- 1 FF FE UTF-16-little-endian
-- 2 FE FF UTF-16-big-endian
-- 3 FF FE 00 00 UTF-32-little-endian
-- 4 00 00 FE FF UTF-32-big-endian
-
-unicode.utfname = {
- [0] = 'utf-8',
- [1] = 'utf-16-le',
- [2] = 'utf-16-be',
- [3] = 'utf-32-le',
- [4] = 'utf-32-be'
-}
-
+--
-- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated
-function unicode.utftype(f)
- local str = f:read(4)
- if not str then
- f:seek('set')
- return 0
- -- elseif find(str,"^%z%z\254\255") then -- depricated
- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
- return 4
- -- elseif find(str,"^\255\254%z%z") then -- depricated
- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
- return 3
- elseif find(str,"^\254\255") then
- f:seek('set',2)
- return 2
- elseif find(str,"^\255\254") then
- f:seek('set',2)
- return 1
- elseif find(str,"^\239\187\191") then
- f:seek('set',3)
- return 0
- else
- f:seek('set')
- return 0
+-- utf.name = {
+-- [0] = 'utf-8',
+-- [1] = 'utf-16-le',
+-- [2] = 'utf-16-be',
+-- [3] = 'utf-32-le',
+-- [4] = 'utf-32-be'
+-- }
+--
+-- function utf.magic(f)
+-- local str = f:read(4)
+-- if not str then
+-- f:seek('set')
+-- return 0
+-- -- elseif find(str,"^%z%z\254\255") then -- depricated
+-- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
+-- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
+-- return 4
+-- -- elseif find(str,"^\255\254%z%z") then -- depricated
+-- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
+-- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
+-- return 3
+-- elseif find(str,"^\254\255") then
+-- f:seek('set',2)
+-- return 2
+-- elseif find(str,"^\255\254") then
+-- f:seek('set',2)
+-- return 1
+-- elseif find(str,"^\239\187\191") then
+-- f:seek('set',3)
+-- return 0
+-- else
+-- f:seek('set')
+-- return 0
+-- end
+-- end
+
+function utf.magic(f) -- not used
+ local str = f:read(4) or ""
+ local off = lpegmatch(p_utfoffset,str)
+ if off < 4 then
+ f:seek('set',off)
end
+ return lpegmatch(p_utftype,str)
end
-
-
local function utf16_to_utf8_be(t)
if type(t) == "string" then
- t = utfsplitlines(str)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4771,7 +5318,7 @@ end
local function utf16_to_utf8_le(t)
if type(t) == "string" then
- t = utfsplitlines(str)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4799,7 +5346,7 @@ end
local function utf32_to_utf8_be(t)
if type(t) == "string" then
- t = utfsplitlines(t)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4824,7 +5371,7 @@ end
local function utf32_to_utf8_le(t)
if type(t) == "string" then
- t = utfsplitlines(t)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4847,20 +5394,20 @@ local function utf32_to_utf8_le(t)
return t
end
-unicode.utf32_to_utf8_be = utf32_to_utf8_be
-unicode.utf32_to_utf8_le = utf32_to_utf8_le
-unicode.utf16_to_utf8_be = utf16_to_utf8_be
-unicode.utf16_to_utf8_le = utf16_to_utf8_le
+utf.utf32_to_utf8_be = utf32_to_utf8_be
+utf.utf32_to_utf8_le = utf32_to_utf8_le
+utf.utf16_to_utf8_be = utf16_to_utf8_be
+utf.utf16_to_utf8_le = utf16_to_utf8_le
-function unicode.utf8_to_utf8(t)
- return type(t) == "string" and utfsplitlines(t) or t
+function utf.utf8_to_utf8(t)
+ return type(t) == "string" and lpegmatch(utflinesplitter,t) or t
end
-function unicode.utf16_to_utf8(t,endian)
+function utf.utf16_to_utf8(t,endian)
return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t
end
-function unicode.utf32_to_utf8(t,endian)
+function utf.utf32_to_utf8(t,endian)
return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t
end
@@ -4886,7 +5433,7 @@ local function big(c)
end
end
--- function unicode.utf8_to_utf16(str,littleendian)
+-- function utf.utf8_to_utf16(str,littleendian)
-- if littleendian then
-- return char(255,254) .. utfgsub(str,".",little)
-- else
@@ -4897,7 +5444,7 @@ end
local _, l_remap = utf.remapper(little)
local _, b_remap = utf.remapper(big)
-function unicode.utf8_to_utf16(str,littleendian)
+function utf.utf8_to_utf16(str,littleendian)
if littleendian then
return char(255,254) .. lpegmatch(l_remap,str)
else
@@ -4905,31 +5452,71 @@ function unicode.utf8_to_utf16(str,littleendian)
end
end
-function unicode.utfcodes(str)
- local t, n = { }, 0
- for u in utfvalues(str) do
- n = n + 1
- t[n] = format("0x%04X",u)
- end
- return concat(t,separator or " ")
+-- function utf.tocodes(str,separator) -- can be sped up with an lpeg
+-- local t, n = { }, 0
+-- for u in utfvalues(str) do
+-- n = n + 1
+-- t[n] = format("0x%04X",u)
+-- end
+-- return concat(t,separator or " ")
+-- end
+
+local pattern = Cs (
+ (p_utf8byte / function(unicode ) return format( "0x%04X", unicode) end) *
+ (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0
+)
+
+function utf.tocodes(str,separator)
+ return lpegmatch(pattern,str,1,separator or " ")
end
-function unicode.ustring(s)
+function utf.ustring(s)
return format("U+%05X",type(s) == "number" and s or utfbyte(s))
end
-function unicode.xstring(s)
+function utf.xstring(s)
return format("0x%05X",type(s) == "number" and s or utfbyte(s))
end
--
-local pattern = Ct(C(patterns.utf8char)^0)
+local p_nany = p_utf8char / ""
+
+if utfgmatch then
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local n = 0
+ for _ in utfgmatch(str,what) do
+ n = n + 1
+ end
+ return n
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
+
+else
+
+ local cache = { }
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local p = cache[what]
+ if not p then
+ p = Cs((P(what)/" " + p_nany)^0)
+ cache[p] = p
+ end
+ return #lpegmatch(p,str)
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
-function utf.totable(str)
- return lpegmatch(pattern,str)
end
+-- maybe also register as string.utf*
+
end -- of closure
@@ -4990,24 +5577,10 @@ local tables = utilities.tables
local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub
local concat, insert, remove = table.concat, table.insert, table.remove
local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring
-local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring
+local type, next, rawset, tonumber, load, select = type, next, rawset, tonumber, load, select
local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs
local serialize = table.serialize
--- function tables.definetable(target) -- defines undefined tables
--- local composed, t, n = nil, { }, 0
--- for name in gmatch(target,"([^%.]+)") do
--- n = n + 1
--- if composed then
--- composed = composed .. "." .. name
--- else
--- composed = name
--- end
--- t[n] = format("%s = %s or { }",composed,composed)
--- end
--- return concat(t,"\n")
--- end
-
local splitter = lpeg.tsplitat(".")
function tables.definetable(target,nofirst,nolast) -- defines undefined tables
@@ -5036,13 +5609,13 @@ end
-- local t = tables.definedtable("a","b","c","d")
function tables.definedtable(...)
- local l = { ... }
local t = _G
- for i=1,#l do
- local tl = t[l[i]]
+ for i=1,select("#",...) do
+ local li = select(i,...)
+ local tl = t[li]
if not tl then
tl = { }
- t[l[i]] = tl
+ t[li] = tl
end
t = tl
end
@@ -5235,7 +5808,7 @@ function table.deserialize(str)
if not str or str == "" then
return
end
- local code = loadstring(str)
+ local code = load(str)
if not code then
return
end
@@ -5252,7 +5825,7 @@ function table.load(filename)
if filename then
local t = io.loaddata(filename)
if t and t ~= "" then
- t = loadstring(t)
+ t = load(t)
if type(t) == "function" then
t = t()
if type(t) == "table" then
@@ -5331,9 +5904,11 @@ utilities = utilities or { }
utilities.storage = utilities.storage or { }
local storage = utilities.storage
+local report = texio and texio.write_nl or print
+
function storage.mark(t)
if not t then
- texio.write_nl("fatal error: storage cannot be marked")
+ report("fatal error: storage cannot be marked")
return -- os.exit()
end
local m = getmetatable(t)
@@ -5363,12 +5938,36 @@ end
function storage.checked(t)
if not t then
- texio.write_nl("fatal error: storage has not been allocated")
+ report("fatal error: storage has not been allocated")
return -- os.exit()
end
return t
end
+-- function utilities.storage.delay(parent,name,filename)
+-- local m = getmetatable(parent)
+-- m.__list[name] = filename
+-- end
+--
+-- function utilities.storage.predefine(parent)
+-- local list = { }
+-- local m = getmetatable(parent) or {
+-- __list = list,
+-- __index = function(t,k)
+-- local l = require(list[k])
+-- t[k] = l
+-- return l
+-- end
+-- }
+-- setmetatable(parent,m)
+-- end
+--
+-- bla = { }
+-- utilities.storage.predefine(bla)
+-- utilities.storage.delay(bla,"test","oepsoeps")
+-- local t = bla.test
+-- table.print(t)
+-- print(t.a)
function storage.setinitializer(data,initialize)
local m = getmetatable(data) or { }
@@ -5393,12 +5992,14 @@ end
-- table namespace ?
-local function f_empty () return "" end -- t,k
-local function f_self (t,k) t[k] = k return k end
-local function f_ignore() end -- t,k,v
+local function f_empty () return "" end -- t,k
+local function f_self (t,k) t[k] = k return k end
+local function f_table (t,k) local v = { } t[k] = v return v end
+local function f_ignore() end -- t,k,v
local t_empty = { __index = f_empty }
local t_self = { __index = f_self }
+local t_table = { __index = f_table }
local t_ignore = { __newindex = f_ignore }
function table.setmetatableindex(t,f)
@@ -5408,6 +6009,8 @@ function table.setmetatableindex(t,f)
m.__index = f_empty
elseif f == "key" then
m.__index = f_self
+ elseif f == "table" then
+ m.__index = f_table
else
m.__index = f
end
@@ -5416,6 +6019,8 @@ function table.setmetatableindex(t,f)
setmetatable(t, t_empty)
elseif f == "key" then
setmetatable(t, t_self)
+ elseif f == "table" then
+ setmetatable(t, t_table)
else
setmetatable(t,{ __index = f })
end
@@ -5626,7 +6231,7 @@ if not modules then modules = { } end modules ['util-lua'] = {
}
local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format
-local loadstring, loadfile, type = loadstring, loadfile, type
+local load, loadfile, type = load, loadfile, type
utilities = utilities or {}
utilities.lua = utilities.lua or { }
@@ -5643,11 +6248,23 @@ luautilities.nofstrippedbytes = 0
local strippedchunks = { } -- allocate()
luautilities.strippedchunks = strippedchunks
+luautilities.suffixes = {
+ tma = "tma",
+ tmc = jit and "tmb" or "tmc",
+ lua = "lua",
+ luc = jit and "lub" or "luc",
+ lui = "lui",
+ luv = "luv",
+ luj = "luj",
+ tua = "tua",
+ tuc = "tuc",
+}
+
local function fatalerror(name)
utilities.report(format("fatal error in %q",name or "unknown"))
end
-if jit then
+if jit or status.luatex_version >= 74 then
local function register(name)
if tracestripping then
@@ -5660,7 +6277,7 @@ if jit then
local function stupidcompile(luafile,lucfile,strip)
local code = io.loaddata(luafile)
if code and code ~= "" then
- code = loadstring(code)
+ code = load(code)
if code then
code = dump(code,strip and luautilities.stripcode or luautilities.alwaysstripcode)
if code and code ~= "" then
@@ -5692,13 +6309,13 @@ if jit then
end
if forcestrip or luautilities.alwaysstripcode then
register(name)
- return loadstring(dump(code,true)), 0
+ return load(dump(code,true)), 0
else
return code, 0
end
elseif luautilities.alwaysstripcode then
register(name)
- return loadstring(dump(code,true)), 0
+ return load(dump(code,true)), 0
else
return code, 0
end
@@ -5706,14 +6323,14 @@ if jit then
function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
if forcestrip and luautilities.stripcode or luautilities.alwaysstripcode then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror(name)
end
register(name)
code = dump(code,true)
end
- return loadstring(code), 0
+ return load(code), 0
end
function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true
@@ -5754,67 +6371,79 @@ else
return delta
end
- local function strip_code_pc(dump,name)
- local before = #dump
- local version, format, endian, int, size, ins, num = byte(dump,5,11)
- local subint
- if endian == 1 then
- subint = function(dump, i, l)
- local val = 0
- for n = l, 1, -1 do
- val = val * 256 + byte(dump,i + n - 1)
+ local strip_code_pc
+
+ if _MAJORVERSION == 5 and _MINORVERSION == 1 then
+
+ strip_code_pc = function(dump,name)
+ local before = #dump
+ local version, format, endian, int, size, ins, num = byte(dump,5,11)
+ local subint
+ if endian == 1 then
+ subint = function(dump, i, l)
+ local val = 0
+ for n = l, 1, -1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
+ end
+ else
+ subint = function(dump, i, l)
+ local val = 0
+ for n = 1, l, 1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
end
- return val, i + l
end
- else
- subint = function(dump, i, l)
- local val = 0
- for n = 1, l, 1 do
- val = val * 256 + byte(dump,i + n - 1)
+ local strip_function
+ strip_function = function(dump)
+ local count, offset = subint(dump, 1, size)
+ local stripped, dirty = rep("\0", size), offset + count
+ offset = offset + count + int * 2 + 4
+ offset = offset + int + subint(dump, offset, int) * ins
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ local t
+ t, offset = subint(dump, offset, 1)
+ if t == 1 then
+ offset = offset + 1
+ elseif t == 4 then
+ offset = offset + size + subint(dump, offset, size)
+ elseif t == 3 then
+ offset = offset + num
+ end
end
- return val, i + l
- end
- end
- local strip_function
- strip_function = function(dump)
- local count, offset = subint(dump, 1, size)
- local stripped, dirty = rep("\0", size), offset + count
- offset = offset + count + int * 2 + 4
- offset = offset + int + subint(dump, offset, int) * ins
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- local t
- t, offset = subint(dump, offset, 1)
- if t == 1 then
- offset = offset + 1
- elseif t == 4 then
- offset = offset + size + subint(dump, offset, size)
- elseif t == 3 then
- offset = offset + num
+ count, offset = subint(dump, offset, int)
+ stripped = stripped .. sub(dump,dirty, offset - 1)
+ for n = 1, count do
+ local proto, off = strip_function(sub(dump,offset, -1))
+ stripped, offset = stripped .. proto, offset + off - 1
end
+ offset = offset + subint(dump, offset, int) * int + int
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size + int * 2
+ end
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size
+ end
+ stripped = stripped .. rep("\0", int * 3)
+ return stripped, offset
end
- count, offset = subint(dump, offset, int)
- stripped = stripped .. sub(dump,dirty, offset - 1)
- for n = 1, count do
- local proto, off = strip_function(sub(dump,offset, -1))
- stripped, offset = stripped .. proto, offset + off - 1
- end
- offset = offset + subint(dump, offset, int) * int + int
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- offset = offset + subint(dump, offset, size) + size + int * 2
- end
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- offset = offset + subint(dump, offset, size) + size
- end
- stripped = stripped .. rep("\0", int * 3)
- return stripped, offset
+ dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1))
+ local after = #dump
+ local delta = register(name,before,after)
+ return dump, delta
+ end
+
+ else
+
+ strip_code_pc = function(dump,name)
+ return dump, 0
end
- dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1))
- local after = #dump
- local delta = register(name,before,after)
- return dump, delta
+
end
-- ... end of borrowed code.
@@ -5834,14 +6463,14 @@ else
end
if forcestrip then
local code, n = strip_code_pc(dump(code),name)
- return loadstring(code), n
+ return load(code), n
elseif luautilities.alwaysstripcode then
- return loadstring(strip_code_pc(dump(code),name))
+ return load(strip_code_pc(dump(code),name))
else
return code, 0
end
elseif luautilities.alwaysstripcode then
- return loadstring(strip_code_pc(dump(code),name))
+ return load(strip_code_pc(dump(code),name))
else
return code, 0
end
@@ -5850,20 +6479,20 @@ else
function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
local n = 0
if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror(name)
end
code, n = strip_code_pc(dump(code),name)
end
- return loadstring(code), n
+ return load(code), n
end
local function stupidcompile(luafile,lucfile,strip)
local code = io.loaddata(luafile)
local n = 0
if code and code ~= "" then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror()
end
@@ -5903,6 +6532,7 @@ else
utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile)
end
cleanup = false -- better see how bad it is
+ done = true -- hm
end
if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then
utilities.report("lua: removing %s",luafile)
@@ -5949,7 +6579,6 @@ if not modules then modules = { } end modules ['util-prs'] = {
}
local lpeg, table, string = lpeg, table, string
-
local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
local lpegmatch, patterns = lpeg.match, lpeg.patterns
local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find
@@ -6367,6 +6996,45 @@ function parsers.rfc4180splitter(specification)
end
end
+-- utilities.parsers.stepper("1,7-",9,function(i) print(">>>",i) end)
+-- utilities.parsers.stepper("1-3,7,8,9")
+-- utilities.parsers.stepper("1-3,6,7",function(i) print(">>>",i) end)
+-- utilities.parsers.stepper(" 1 : 3, ,7 ")
+-- utilities.parsers.stepper("1:4,9:13,24:*",30)
+
+local function ranger(first,last,n,action)
+ if not first then
+ -- forget about it
+ elseif last == true then
+ for i=first,n or first do
+ action(i)
+ end
+ elseif last then
+ for i=first,last do
+ action(i)
+ end
+ else
+ action(first)
+ end
+end
+
+local cardinal = patterns.cardinal / tonumber
+local spacers = patterns.spacer^0
+local endofstring = patterns.endofstring
+
+local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + Cc(true) ) + Cc(false) )
+ * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
+
+local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
+ * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring -- we're sort of strict (could do without endofstring)
+
+function utilities.parsers.stepper(str,n,action)
+ if type(n) == "function" then
+ lpegmatch(stepper,str,1,false,n or print)
+ else
+ lpegmatch(stepper,str,1,n,action or print)
+ end
+end
end -- of closure
@@ -6817,7 +7485,7 @@ if not modules then modules = { } end modules ['trac-set'] = { -- might become u
local type, next, tostring = type, next, tostring
local concat = table.concat
-local format, find, lower, gsub, escapedpattern = string.format, string.find, string.lower, string.gsub, string.escapedpattern
+local format, find, lower, gsub, topattern = string.format, string.find, string.lower, string.gsub, string.topattern
local is_boolean = string.is_boolean
local settings_to_hash = utilities.parsers.settings_to_hash
local allocate = utilities.storage.allocate
@@ -6905,7 +7573,7 @@ local function set(t,what,newvalue)
else
value = is_boolean(value,value)
end
- w = "^" .. escapedpattern(w,true) .. "$" -- new: anchored
+ w = topattern(w,true,true)
for name, functions in next, data do
if done[name] then
-- prevent recursion due to wildcards
@@ -6959,7 +7627,8 @@ function setters.register(t,what,...)
end
end
local default = functions.default -- can be set from cnf file
- for _, fnc in next, { ... } do
+ for i=1,select("#",...) do
+ local fnc = select(i,...)
local typ = type(fnc)
if typ == "string" then
if trace_initialize then
@@ -7028,7 +7697,7 @@ function setters.show(t)
local value, default, modules = functions.value, functions.default, #functions
value = value == nil and "unset" or tostring(value)
default = default == nil and "unset" or tostring(default)
- t.report("%-50s modules: %2i default: %6s value: %6s",name,modules,default,value)
+ t.report("%-50s modules: %2i default: %-12s value: %-12s",name,modules,default,value)
end
end
t.report()
@@ -7052,17 +7721,29 @@ local function report(setter,...)
end
end
-function setters.new(name)
+local function default(setter,name)
+ local d = setter.data[name]
+ return d and d.default
+end
+
+local function value(setter,name)
+ local d = setter.data[name]
+ return d and (d.value or d.default)
+end
+
+function setters.new(name) -- we could use foo:bar syntax (but not used that often)
local setter -- we need to access it in setter itself
setter = {
data = allocate(), -- indexed, but also default and value fields
name = name,
- report = function(...) report (setter,...) end,
- enable = function(...) enable (setter,...) end,
- disable = function(...) disable (setter,...) end,
- register = function(...) register(setter,...) end,
- list = function(...) list (setter,...) end,
- show = function(...) show (setter,...) end,
+ report = function(...) report (setter,...) end,
+ enable = function(...) enable (setter,...) end,
+ disable = function(...) disable (setter,...) end,
+ register = function(...) register(setter,...) end,
+ list = function(...) list (setter,...) end,
+ show = function(...) show (setter,...) end,
+ default = function(...) return default (setter,...) end,
+ value = function(...) return value (setter,...) end,
}
data[name] = setter
return setter
@@ -7189,9 +7870,9 @@ if not modules then modules = { } end modules ['trac-log'] = {
local write_nl, write = texio and texio.write_nl or print, texio and texio.write or io.write
local format, gmatch, find = string.format, string.gmatch, string.find
local concat, insert, remove = table.concat, table.insert, table.remove
-local escapedpattern = string.escapedpattern
+local topattern = string.topattern
local texcount = tex and tex.count
-local next, type = next, type
+local next, type, select = next, type, select
local setmetatableindex = table.setmetatableindex
@@ -7502,7 +8183,7 @@ local function setblocked(category,value)
if data[c] then
v.state = value
else
- c = escapedpattern(c,true)
+ c = topattern(c,true,true)
for k, v in next, data do
if find(k,c) then
v.state = value
@@ -7720,10 +8401,10 @@ local function reporthelp(t,...)
if type(helpinfo) == "string" then
reportlines(t,helpinfo)
elseif type(helpinfo) == "table" then
- local tags = { ... }
- for i=1,#tags do
- reportlines(t,t.helpinfo[tags[i]])
- if i < #tags then
+ local n = select("#",...)
+ for i=1,n do
+ reportlines(t,t.helpinfo[select(i,...)])
+ if i < n then
t.report()
end
end
@@ -8013,6 +8694,10 @@ local format, sub, match, gsub, find = string.format, string.sub, string.match,
local unquoted, quoted = string.unquoted, string.quoted
local concat, insert, remove = table.concat, table.insert, table.remove
local loadedluacode = utilities.lua.loadedluacode
+local luasuffixes = utilities.lua.suffixes
+
+environment = environment or { }
+local environment = environment
-- precautions
@@ -8022,9 +8707,29 @@ function os.setlocale()
-- no way you can mess with it
end
--- dirty tricks
+-- dirty tricks (we will replace the texlua call by luatex --luaonly)
-if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaonly" then
+local validengines = allocate {
+ ["luatex"] = true,
+ ["luajittex"] = true,
+ -- ["luatex.exe"] = true,
+ -- ["luajittex.exe"] = true,
+}
+
+local basicengines = allocate {
+ ["luatex"] = "luatex",
+ ["texlua"] = "luatex",
+ ["texluac"] = "luatex",
+ ["luajittex"] = "luajittex",
+ ["texluajit"] = "luajittex",
+ -- ["texlua.exe"] = "luatex",
+ -- ["texluajit.exe"] = "luajittex",
+}
+
+environment.validengines = validengines
+environment.basicengines = basicengines
+
+if arg and validengines[file.removesuffix(arg[0])] and arg[1] == "--luaonly" then
arg[-1] = arg[0]
arg[ 0] = arg[2]
for k=3,#arg do
@@ -8056,9 +8761,6 @@ end
-- environment
-environment = environment or { }
-local environment = environment
-
environment.arguments = allocate()
environment.files = allocate()
environment.sortedflags = nil
@@ -8114,7 +8816,7 @@ function environment.initializearguments(arg)
end
end
end
- environment.ownname = environment.ownname or arg[0] or 'unknown.lua'
+ environment.ownname = file.reslash(environment.ownname or arg[0] or 'unknown.lua')
end
function environment.setargument(name,value)
@@ -8195,6 +8897,22 @@ function environment.reconstructcommandline(arg,noquote)
end
end
+-- -- to be tested:
+--
+-- function environment.reconstructcommandline(arg,noquote)
+-- arg = arg or environment.originalarguments
+-- if noquote and #arg == 1 then
+-- return unquoted(resolvers.resolve(arg[1]))
+-- elseif #arg > 0 then
+-- local result = { }
+-- for i=1,#arg do
+-- result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote
+-- end
+-- return concat(result," ")
+-- else
+-- return ""
+-- end
+-- end
if arg then
@@ -8289,9 +9007,11 @@ function environment.loadluafile(filename, version)
local lucname, luaname, chunk
local basename = file.removesuffix(filename)
if basename == filename then
- lucname, luaname = basename .. ".luc", basename .. ".lua"
+ luaname = fiule.addsuffix(basename,luasuffixes.lua)
+ lucname = fiule.addsuffix(basename,luasuffixes.luc)
else
- lucname, luaname = nil, basename -- forced suffix
+ luaname = basename -- forced suffix
+ lucname = nil
end
-- when not overloaded by explicit suffix we look for a luc file first
local fullname = (lucname and environment.luafile(lucname)) or ""
@@ -8372,7 +9092,6 @@ xml = xml or { }
local xml = xml
-local utf = unicode.utf8
local concat, remove, insert = table.concat, table.remove, table.insert
local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber
local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub
@@ -9583,7 +10302,7 @@ if not modules then modules = { } end modules ['lxml-lpt'] = {
-- todo: B/C/[get first match]
local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
+local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
@@ -10195,7 +10914,7 @@ end
local function register_expression(expression)
local converted = lpegmatch(converter,expression)
- local runner = loadstring(format(template_e,converted))
+ local runner = load(format(template_e,converted))
runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
end
@@ -10203,9 +10922,9 @@ end
local function register_finalizer(protocol,name,arguments)
local runner
if arguments and arguments ~= "" then
- runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
+ runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
else
- runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name))
+ runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
end
runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
@@ -10597,6 +11316,7 @@ end
expressions.child = function(e,pattern)
return applylpath(e,pattern) -- todo: cache
end
+
expressions.count = function(e,pattern) -- what if pattern == empty or nil
local collected = applylpath(e,pattern) -- todo: cache
return pattern and (collected and #collected) or 0
@@ -10604,13 +11324,30 @@ end
-- external
-expressions.oneof = function(s,...) -- slow
- local t = {...} for i=1,#t do if s == t[i] then return true end end return false
+-- expressions.oneof = function(s,...)
+-- local t = {...}
+-- for i=1,#t do
+-- if s == t[i] then
+-- return true
+-- end
+-- end
+-- return false
+-- end
+
+expressions.oneof = function(s,...)
+ for i=1,select("#",...) do
+ if s == select(i,...) then
+ return true
+ end
+ end
+ return false
end
+
expressions.error = function(str)
xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
return false
end
+
expressions.undefined = function(s)
return s == nil
end
@@ -12220,7 +12957,6 @@ if not modules then modules = { } end modules ['data-ini'] = {
}
local gsub, find, gmatch, char = string.gsub, string.find, string.gmatch, string.char
-local concat = table.concat
local next, type = next, type
local filedirname, filebasename, filejoin = file.dirname, file.basename, file.join
@@ -12311,6 +13047,10 @@ do
local args = environment.originalarguments or arg -- this needs a cleanup
+ if not environment.ownmain then
+ environment.ownmain = status and string.match(string.lower(status.banner),"this is ([%a]+)") or "luatex"
+ end
+
local ownbin = environment.ownbin or args[-2] or arg[-2] or args[-1] or arg[-1] or arg[0] or "luatex"
local ownpath = environment.ownpath or os.selfdir
@@ -12427,19 +13167,6 @@ end
environment.texroot = file.collapsepath(texroot)
--- Tracing. Todo ...
-
-function resolvers.settrace(n) -- no longer number but: 'locating' or 'detail'
- if n then
- trackers.disable("resolvers.*")
- trackers.enable("resolvers."..n)
- end
-end
-
-resolvers.settrace(osgetenv("MTX_INPUT_TRACE"))
-
--- todo:
-
if profiler then
directives.register("system.profile",function()
profiler.start("luatex-profile.log")
@@ -12946,6 +13673,8 @@ resolvers.suffixes = suffixes
resolvers.dangerous = dangerous
resolvers.suffixmap = suffixmap
+local luasuffixes = utilities.lua.suffixes
+
local relations = allocate { -- todo: handlers also here
core = {
ofm = { -- will become obsolete
@@ -13031,7 +13760,7 @@ local relations = allocate { -- todo: handlers also here
lua = {
names = { "lua" },
variable = 'LUAINPUTS',
- suffixes = { 'lua', 'luc', 'tma', 'tmc' },
+ suffixes = { luasuffixes.lua, luasuffixes.luc, luasuffixes.tma, luasuffixes.tmc },
},
lib = {
names = { "lib" },
@@ -13227,6 +13956,7 @@ if not modules then modules = { } end modules ['data-tmp'] = {
local format, lower, gsub, concat = string.format, string.lower, string.gsub, table.concat
local serialize, serializetofile = table.serialize, table.tofile
local mkdirs, isdir = dir.mkdirs, lfs.isdir
+local addsuffix, is_writable, is_readable = file.addsuffix, file.is_writable, file.is_readable
local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end)
local trace_cache = false trackers.register("resolvers.cache", function(v) trace_cache = v end)
@@ -13251,8 +13981,10 @@ end
-- end of intermezzo
-caches = caches or { }
-local caches = caches
+caches = caches or { }
+local caches = caches
+
+local luasuffixes = utilities.lua.suffixes
caches.base = caches.base or "luatex-cache"
caches.more = caches.more or "context"
@@ -13280,18 +14012,18 @@ local function identify()
cachepath = file.collapsepath(cachepath)
local valid = isdir(cachepath)
if valid then
- if file.is_readable(cachepath) then
+ if is_readable(cachepath) then
readables[#readables+1] = cachepath
- if not writable and file.is_writable(cachepath) then
+ if not writable and is_writable(cachepath) then
writable = cachepath
end
end
elseif not writable and caches.force then
local cacheparent = file.dirname(cachepath)
- if file.is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths)
+ if is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths)
if not caches.ask or io.ask(format("\nShould I create the cache path %s?",cachepath), "no", { "yes", "no" }) == "yes" then
mkdirs(cachepath)
- if isdir(cachepath) and file.is_writable(cachepath) then
+ if isdir(cachepath) and is_writable(cachepath) then
report_caches("created: %s",cachepath)
writable = cachepath
readables[#readables+1] = cachepath
@@ -13313,8 +14045,8 @@ local function identify()
cachepath = resolvers.resolve(cachepath)
cachepath = resolvers.cleanpath(cachepath)
local valid = isdir(cachepath)
- if valid and file.is_readable(cachepath) then
- if not writable and file.is_writable(cachepath) then
+ if valid and is_readable(cachepath) then
+ if not writable and is_writable(cachepath) then
readables[#readables+1] = cachepath
writable = cachepath
break
@@ -13403,7 +14135,7 @@ end
local r_cache, w_cache = { }, { } -- normally w in in r but who cares
-local function getreadablepaths(...) -- we can optimize this as we have at most 2 tags
+local function getreadablepaths(...)
local tags = { ... }
local hash = concat(tags,"/")
local done = r_cache[hash]
@@ -13446,7 +14178,7 @@ function caches.getfirstreadablefile(filename,...)
for i=1,#rd do
local path = rd[i]
local fullname = file.join(path,filename)
- if file.is_readable(fullname) then
+ if is_readable(fullname) then
usedreadables[i] = true
return fullname, path
end
@@ -13467,7 +14199,7 @@ function caches.define(category,subcategory) -- for old times sake
end
function caches.setluanames(path,name)
- return path .. "/" .. name .. ".tma", path .. "/" .. name .. ".tmc"
+ return format("%s/%s.%s",path,name,luasuffixes.tma), format("%s/%s.%s",path,name,luasuffixes.tmc)
end
function caches.loaddata(readables,name)
@@ -13477,7 +14209,13 @@ function caches.loaddata(readables,name)
for i=1,#readables do
local path = readables[i]
local tmaname, tmcname = caches.setluanames(path,name)
- local loader = loadfile(tmcname) or loadfile(tmaname)
+ local loader = loadfile(tmcname)
+ if not loader then
+ -- in case we have a different engine
+ utilities.lua.compile(tmaname,tmcname)
+ --
+ loader = loadfile(tmaname)
+ end
if loader then
loader = loader()
collectgarbage("step")
@@ -13489,11 +14227,15 @@ end
function caches.is_writable(filepath,filename)
local tmaname, tmcname = caches.setluanames(filepath,filename)
- return file.is_writable(tmaname)
+ return is_writable(tmaname)
end
local saveoptions = { compact = true }
+-- add some point we will only use the internal bytecode compiler and
+-- then we can flag success in the tma so that it can trigger a compile
+-- if the other engine
+
function caches.savedata(filepath,filename,data,raw)
local tmaname, tmcname = caches.setluanames(filepath,filename)
local reduce, simplify = true, true
@@ -13519,9 +14261,9 @@ end
function caches.loadcontent(cachename,dataname)
local name = caches.hashed(cachename)
- local full, path = caches.getfirstreadablefile(name ..".lua","trees")
+ local full, path = caches.getfirstreadablefile(addsuffix(name,luasuffixes.lua),"trees")
local filename = file.join(path,name)
- local blob = loadfile(filename .. ".luc") or loadfile(filename .. ".lua")
+ local blob = loadfile(addsuffix(filename,luasuffixes.luc)) or loadfile(addsuffix(filename,luasuffixes.lua))
if blob then
local data = blob()
if data and data.content then
@@ -13556,9 +14298,10 @@ end
function caches.savecontent(cachename,dataname,content)
local name = caches.hashed(cachename)
- local full, path = caches.setfirstwritablefile(name ..".lua","trees")
+ local full, path = caches.setfirstwritablefile(addsuffix(name,luasuffixes.lua),"trees")
local filename = file.join(path,name) -- is full
- local luaname, lucname = filename .. ".lua", filename .. ".luc"
+ local luaname = addsuffix(filename,luasuffixes.lua)
+ local lucname = addsuffix(filename,luasuffixes.luc)
if trace_locating then
report_resolvers("preparing '%s' for '%s'",dataname,cachename)
end
@@ -13763,6 +14506,7 @@ local joinpath = file.joinpath
local allocate = utilities.storage.allocate
local settings_to_array = utilities.parsers.settings_to_array
local setmetatableindex = table.setmetatableindex
+local luasuffixes = utilities.lua.suffixes
local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end)
local trace_detail = false trackers.register("resolvers.details", function(v) trace_detail = v end)
@@ -15325,15 +16069,19 @@ function resolvers.dowithvariable(name,func)
end
function resolvers.locateformat(name)
- local barename = file.removesuffix(name) -- gsub(name,"%.%a+$","")
- local fmtname = caches.getfirstreadablefile(barename..".fmt","formats") or ""
+ local engine = environment.ownmain or "luatex"
+ local barename = file.removesuffix(name)
+ local fullname = file.addsuffix(barename,"fmt")
+ local fmtname = caches.getfirstreadablefile(fullname,"formats",engine) or ""
if fmtname == "" then
- fmtname = resolvers.findfile(barename..".fmt")
+ fmtname = resolvers.findfile(fullname)
fmtname = resolvers.cleanpath(fmtname)
end
if fmtname ~= "" then
local barename = file.removesuffix(fmtname)
- local luaname, lucname, luiname = barename .. ".lua", barename .. ".luc", barename .. ".lui"
+ local luaname = file.addsuffix(barename,luasuffixes.lua)
+ local lucname = file.addsuffix(barename,luasuffixes.luc)
+ local luiname = file.addsuffix(barename,luasuffixes.lui)
if lfs.isfile(luiname) then
return barename, luiname
elseif lfs.isfile(lucname) then
@@ -15430,10 +16178,9 @@ local resolvers = resolvers
local prefixes = utilities.storage.allocate()
resolvers.prefixes = prefixes
-local gsub = string.gsub
local cleanpath, findgivenfile, expansion = resolvers.cleanpath, resolvers.findgivenfile, resolvers.expansion
local getenv = resolvers.getenv -- we can probably also use resolvers.expansion
-local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match
+local P, S, R, C, Cs, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cs, lpeg.match
local joinpath, basename, dirname = file.join, file.basename, file.dirname
local getmetatable, rawset, type = getmetatable, rawset, type
@@ -15555,6 +16302,28 @@ end
-- todo: use an lpeg (see data-lua for !! / stripper)
+-- local function resolve(str) -- use schemes, this one is then for the commandline only
+-- if type(str) == "table" then
+-- local t = { }
+-- for i=1,#str do
+-- t[i] = resolve(str[i])
+-- end
+-- return t
+-- else
+-- local res = resolved[str]
+-- if not res then
+-- res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added)
+-- resolved[str] = res
+-- abstract[res] = str
+-- end
+-- return res
+-- end
+-- end
+
+-- home:xx;selfautoparent:xx;
+
+local pattern = Cs((C(R("az")^2) * P(":") * C((1-S(" \"\';,"))^1) / _resolve_ + P(1))^0)
+
local function resolve(str) -- use schemes, this one is then for the commandline only
if type(str) == "table" then
local t = { }
@@ -15565,7 +16334,7 @@ local function resolve(str) -- use schemes, this one is then for the commandline
else
local res = resolved[str]
if not res then
- res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added)
+ res = lpegmatch(pattern,str)
resolved[str] = res
abstract[res] = str
end
@@ -15998,7 +16767,7 @@ statistics.register("used cache path", function() return caches.usedpaths() end
function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname == formatname
local enginebanner = status.list().banner
if formatbanner and enginebanner and sourcefile then
- local luvname = file.replacesuffix(texname,"luv")
+ local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv
local luvdata = {
enginebanner = enginebanner,
formatbanner = formatbanner,
@@ -16009,10 +16778,14 @@ function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname ==
end
end
+-- todo: check this at startup and return (say) 999 as signal that the run
+-- was aborted due to a wrong format in which case mtx-context can trigger
+-- a remake
+
function statistics.checkfmtstatus(texname)
local enginebanner = status.list().banner
if enginebanner and texname then
- local luvname = file.replacesuffix(texname,"luv")
+ local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv
if lfs.isfile(luvname) then
local luv = dofile(luvname)
if luv and luv.sourcefile then
@@ -16389,7 +17162,7 @@ if not modules then modules = { } end modules ['data-sch'] = {
license = "see context related readme files"
}
-local loadstring = loadstring
+local load = load
local gsub, concat, format = string.gsub, table.concat, string.format
local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders
@@ -16575,7 +17348,7 @@ schemes.fetchstring = fetchstring
function schemes.fetchtable(url,data)
local reply = fetchstring(url,data)
if reply then
- local s = loadstring("return " .. reply)
+ local s = load("return " .. reply)
if s then
return s()
end
@@ -16603,6 +17376,8 @@ if not modules then modules = { } end modules ['data-lua'] = {
-- -- local mylib = require("libtest")
-- -- local mysql = require("luasql.mysql")
+local searchers = package.searchers or package.loaders
+
local concat = table.concat
local trace_libraries = false
@@ -16704,13 +17479,9 @@ function package.extraclibpath(...)
end
end
-if not package.loaders then
- package.loaders = package.searchers -- 5.2
-end
-
-if not package.loaders[-2] then
+if not searchers[-2] then
-- use package-path and package-cpath
- package.loaders[-2] = package.loaders[2]
+ searchers[-2] = searchers[2]
end
local function loadedaslib(resolved,rawname)
@@ -16721,7 +17492,7 @@ local function loadedbylua(name)
if trace_libraries then
report_libraries("! locating %q using normal loader",name)
end
- local resolved = package.loaders[-2](name)
+ local resolved = searchers[-2](name)
end
local function loadedbyformat(name,rawname,suffixes,islib)
@@ -16776,7 +17547,7 @@ local function notloaded(name)
end
end
-package.loaders[2] = function(name)
+searchers[2] = function(name)
local thename = gsub(name,"%.","/")
local luaname = file.addsuffix(thename,"lua")
local libname = file.addsuffix(thename,os.libsuffix)
@@ -16790,8 +17561,8 @@ package.loaders[2] = function(name)
or notloaded (name)
end
--- package.loaders[3] = nil
--- package.loaders[4] = nil
+-- searchers[3] = nil
+-- searchers[4] = nil
resolvers.loadlualib = require
@@ -17161,13 +17932,11 @@ if not modules then modules = { } end modules ['luat-fmt'] = {
local format = string.format
+local quoted = string.quoted
+local luasuffixes = utilities.lua.suffixes
local report_format = logs.reporter("resolvers","formats")
--- helper for mtxrun
-
-local quoted = string.quoted
-
local function primaryflags() -- not yet ok
local trackers = environment.argument("trackers")
local directives = environment.argument("directives")
@@ -17182,13 +17951,14 @@ local function primaryflags() -- not yet ok
end
function environment.make_format(name)
+ local engine = environment.ownmain or "luatex"
-- change to format path (early as we need expanded paths)
- local olddir = lfs.currentdir()
- local path = caches.getwritablepath("formats") or "" -- maybe platform
+ local olddir = dir.current()
+ local path = caches.getwritablepath("formats",engine) or "" -- maybe platform
if path ~= "" then
lfs.chdir(path)
end
- report_format("format path: %s",lfs.currentdir())
+ report_format("format path: %s",dir.current())
-- check source file
local texsourcename = file.addsuffix(name,"mkiv")
local fulltexsourcename = resolvers.findfile(texsourcename,"tex") or ""
@@ -17225,13 +17995,12 @@ function environment.make_format(name)
elseif type(usedlualibs) == "table" then
report_format("using stub specification: %s",fullspecificationname)
local texbasename = file.basename(name)
- local luastubname = file.addsuffix(texbasename,"lua")
- local lucstubname = file.addsuffix(texbasename,"luc")
+ local luastubname = file.addsuffix(texbasename,luasuffixes.lua)
+ local lucstubname = file.addsuffix(texbasename,luasuffixes.luc)
-- pack libraries in stub
report_format("creating initialization file: %s",luastubname)
utilities.merger.selfcreate(usedlualibs,specificationpath,luastubname)
-- compile stub file (does not save that much as we don't use this stub at startup any more)
- local strip = resolvers.booleanvariable("LUACSTRIP", true)
if utilities.lua.compile(luastubname,lucstubname) and lfs.isfile(lucstubname) then
report_format("using compiled initialization file: %s",lucstubname)
usedluastub = lucstubname
@@ -17245,7 +18014,7 @@ function environment.make_format(name)
return
end
-- generate format
- local command = format("luatex --ini %s --lua=%s %s %sdump",primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\")
+ local command = format("%s --ini %s --lua=%s %s %sdump",engine,primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\")
report_format("running command: %s\n",command)
os.spawn(command)
-- remove related mem files
@@ -17264,8 +18033,9 @@ end
function environment.run_format(name,data,more)
if name and name ~= "" then
+ local engine = environment.ownmain or "luatex"
local barename = file.removesuffix(name)
- local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats")
+ local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats",engine)
if fmtname == "" then
fmtname = resolvers.findfile(file.addsuffix(barename,"fmt")) or ""
end
@@ -17282,7 +18052,7 @@ function environment.run_format(name,data,more)
report_format("using format name: %s",fmtname)
report_format("no luc/lua with name: %s",barename)
else
- local command = format("luatex %s --fmt=%s --lua=%s %s %s",primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "")
+ local command = format("%s %s --fmt=%s --lua=%s %s %s",engine,primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "")
report_format("running command: %s",command)
os.spawn(command)
end
@@ -17423,10 +18193,11 @@ own = { } -- not local, might change
own.libs = { -- order can be made better
+ 'l-lua.lua',
+ 'l-lpeg.lua',
'l-function.lua',
'l-string.lua',
'l-table.lua',
- 'l-lpeg.lua',
'l-io.lua',
'l-number.lua',
'l-set.lua',
@@ -17656,6 +18427,10 @@ local helpinfo = [[
--variables show configuration variables
--configurations show configuration order
+--directives show (known) directives
+--trackers show (known) trackers
+--experiments show (known) experiments
+
--expand-braces expand complex variable
--expand-path expand variable (resolve paths)
--expand-var expand variable (resolve references)
@@ -18346,8 +19121,18 @@ else
end
+if e_argument("script") or e_argument("scripts") then
+
+ -- run a script by loading it (using libs), pass args
-if e_argument("selfmerge") then
+ runners.loadbase()
+ if is_mkii_stub then
+ ok = runners.execute_script(filename,false,true)
+ else
+ ok = runners.execute_ctx_script(filename)
+ end
+
+elseif e_argument("selfmerge") then
-- embed used libraries
@@ -18377,17 +19162,6 @@ elseif e_argument("ctxlua") or e_argument("internal") then
runners.loadbase()
ok = runners.execute_script(filename,true)
-elseif e_argument("script") or e_argument("scripts") then
-
- -- run a script by loading it (using libs), pass args
-
- runners.loadbase()
- if is_mkii_stub then
- ok = runners.execute_script(filename,false,true)
- else
- ok = runners.execute_ctx_script(filename)
- end
-
elseif e_argument("execute") then
-- execute script
@@ -18615,6 +19389,18 @@ elseif e_argument("version") then
application.version()
+elseif e_argument("directives") then
+
+ directives.show()
+
+elseif e_argument("trackers") then
+
+ trackers.show()
+
+elseif e_argument("experiments") then
+
+ experiments.show()
+
elseif e_argument("help") or filename=='help' or filename == "" then
application.help()
diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua
index 01c601eb5..3a02ad582 100644
--- a/scripts/context/stubs/mswin/mtxrun.lua
+++ b/scripts/context/stubs/mswin/mtxrun.lua
@@ -1,5 +1,16 @@
#!/usr/bin/env texlua
+-- for k, v in next, _G.string do
+-- local tv = type(v)
+-- if tv == "table" then
+-- for kk, vv in next, v do
+-- print(k,kk,vv)
+-- end
+-- else
+-- print(tv,k,v)
+-- end
+-- end
+
if not modules then modules = { } end modules ['mtxrun'] = {
version = 1.001,
comment = "runner, lua replacement for texmfstart.rb",
@@ -43,7 +54,7 @@ if not modules then modules = { } end modules ['mtxrun'] = {
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-functions'] = {
+if not modules then modules = { } end modules ['l-lua'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -51,16 +62,112 @@ if not modules then modules = { } end modules ['l-functions'] = {
license = "see context related readme files"
}
-functions = functions or { }
+-- compatibility hacks ... try to avoid usage
-function functions.dummy() end
+local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$")
+
+_MAJORVERSION = tonumber(major) or 5
+_MINORVERSION = tonumber(minor) or 1
+
+-- basics:
+
+if loadstring then
+
+ local loadnormal = load
+
+ function load(first,...)
+ if type(first) == "string" then
+ return loadstring(first,...)
+ else
+ return loadnormal(first,...)
+ end
+ end
+
+else
+
+ loadstring = load
+
+end
+
+-- table:
+
+-- Starting with version 5.2 Lua no longer provide ipairs, which makes
+-- sense. As we already used the for loop and # in most places the
+-- impact on ConTeXt was not that large; the remaining ipairs already
+-- have been replaced. In a similar fashion we also hardly used pairs.
+--
+-- Hm, actually ipairs was retained, but we no longer use it anyway.
+--
+-- Just in case, we provide the fallbacks as discussed in Programming
+-- in Lua (http://www.lua.org/pil/7.3.html):
+
+if not ipairs then
+
+ -- for k, v in ipairs(t) do ... end
+ -- for k=1,#t do local v = t[k] ... end
+
+ local function iterate(a,i)
+ i = i + 1
+ local v = a[i]
+ if v ~= nil then
+ return i, v --, nil
+ end
+ end
+
+ function ipairs(a)
+ return iterate, a, 0
+ end
+
+end
+
+if not pairs then
+
+ -- for k, v in pairs(t) do ... end
+ -- for k, v in next, t do ... end
+
+ function pairs(t)
+ return next, t -- , nil
+ end
+
+end
+
+-- The unpack function has been moved to the table table, and for compatiility
+-- reasons we provide both now.
+
+if not table.unpack then
+
+ table.unpack = _G.unpack
+
+elseif not unpack then
+
+ _G.unpack = table.unpack
+
+end
+
+-- package:
+
+-- if not package.seachers then
+--
+-- package.searchers = package.loaders -- 5.2
+--
+-- elseif not package.loaders then
+--
+-- package.loaders = package.searchers
+--
+-- end
+
+if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type
+
+ package.loaders = package.searchers
+
+end
end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-string'] = {
+if not modules then modules = { } end modules ['l-lpeg'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -68,34 +175,843 @@ if not modules then modules = { } end modules ['l-string'] = {
license = "see context related readme files"
}
-local string = string
-local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
-local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct
+-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
--- some functions may disappear as they are not used anywhere
+-- move utf -> l-unicode
+-- move string -> l-string or keep it here
-if not string.split then
+local lpeg = require("lpeg")
- -- this will be overloaded by a faster lpeg variant
+-- tracing (only used when we encounter a problem in integration of lpeg in luatex)
- function string.split(str,pattern)
- local t = { }
- if #str > 0 then
- local n = 1
- for s in gmatch(str..pattern,"(.-)"..pattern) do
- t[n] = s
- n = n + 1
+-- some code will move to unicode and string
+
+local report = texio and texio.write_nl or print
+
+-- local lpmatch = lpeg.match
+-- local lpprint = lpeg.print
+-- local lpp = lpeg.P
+-- local lpr = lpeg.R
+-- local lps = lpeg.S
+-- local lpc = lpeg.C
+-- local lpb = lpeg.B
+-- local lpv = lpeg.V
+-- local lpcf = lpeg.Cf
+-- local lpcb = lpeg.Cb
+-- local lpcg = lpeg.Cg
+-- local lpct = lpeg.Ct
+-- local lpcs = lpeg.Cs
+-- local lpcc = lpeg.Cc
+-- local lpcmt = lpeg.Cmt
+-- local lpcarg = lpeg.Carg
+
+-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end
+
+-- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end
+-- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end
+-- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end
+-- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end
+-- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end
+-- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end
+-- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end
+-- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end
+-- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end
+-- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end
+-- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end
+-- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end
+-- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end
+-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end
+
+local type, next = type, next
+local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
+
+-- Beware, we predefine a bunch of patterns here and one reason for doing so
+-- is that we get consistent behaviour in some of the visualizers.
+
+lpeg.patterns = lpeg.patterns or { } -- so that we can share
+local patterns = lpeg.patterns
+
+local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt
+local lpegtype, lpegmatch = lpeg.type, lpeg.match
+
+local anything = P(1)
+local endofstring = P(-1)
+local alwaysmatched = P(true)
+
+patterns.anything = anything
+patterns.endofstring = endofstring
+patterns.beginofstring = alwaysmatched
+patterns.alwaysmatched = alwaysmatched
+
+local digit, sign = R('09'), S('+-')
+local cr, lf, crlf = P("\r"), P("\n"), P("\r\n")
+local newline = crlf + S("\r\n") -- cr + lf
+local escaped = P("\\") * anything
+local squote = P("'")
+local dquote = P('"')
+local space = P(" ")
+
+local utfbom_32_be = P('\000\000\254\255')
+local utfbom_32_le = P('\255\254\000\000')
+local utfbom_16_be = P('\255\254')
+local utfbom_16_le = P('\254\255')
+local utfbom_8 = P('\239\187\191')
+local utfbom = utfbom_32_be + utfbom_32_le
+ + utfbom_16_be + utfbom_16_le
+ + utfbom_8
+local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le")
+ + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le")
+ + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8
+local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4)
+ + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2)
+ + utfbom_8 * Cc(3) + Cc(0)
+
+local utf8next = R("\128\191")
+
+patterns.utf8one = R("\000\127")
+patterns.utf8two = R("\194\223") * utf8next
+patterns.utf8three = R("\224\239") * utf8next * utf8next
+patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next
+patterns.utfbom = utfbom
+patterns.utftype = utftype
+patterns.utfoffset = utfoffset
+
+local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
+local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false)
+
+patterns.utf8 = utf8char
+patterns.utf8char = utf8char
+patterns.validutf8 = validutf8char
+patterns.validutf8char = validutf8char
+
+local eol = S("\n\r")
+local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
+local whitespace = eol + spacer
+local nonspacer = 1 - spacer
+local nonwhitespace = 1 - whitespace
+
+patterns.eol = eol
+patterns.spacer = spacer
+patterns.whitespace = whitespace
+patterns.nonspacer = nonspacer
+patterns.nonwhitespace = nonwhitespace
+
+local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto
+
+----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0)
+local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0))
+
+patterns.stripper = stripper
+patterns.collapser = collapser
+
+patterns.digit = digit
+patterns.sign = sign
+patterns.cardinal = sign^0 * digit^1
+patterns.integer = sign^0 * digit^1
+patterns.unsigned = digit^0 * P('.') * digit^1
+patterns.float = sign^0 * patterns.unsigned
+patterns.cunsigned = digit^0 * P(',') * digit^1
+patterns.cfloat = sign^0 * patterns.cunsigned
+patterns.number = patterns.float + patterns.integer
+patterns.cnumber = patterns.cfloat + patterns.integer
+patterns.oct = P("0") * R("07")^1
+patterns.octal = patterns.oct
+patterns.HEX = P("0x") * R("09","AF")^1
+patterns.hex = P("0x") * R("09","af")^1
+patterns.hexadecimal = P("0x") * R("09","AF","af")^1
+patterns.lowercase = R("az")
+patterns.uppercase = R("AZ")
+patterns.letter = patterns.lowercase + patterns.uppercase
+patterns.space = space
+patterns.tab = P("\t")
+patterns.spaceortab = patterns.space + patterns.tab
+patterns.newline = newline
+patterns.emptyline = newline^1
+patterns.equal = P("=")
+patterns.comma = P(",")
+patterns.commaspacer = P(",") * spacer^0
+patterns.period = P(".")
+patterns.colon = P(":")
+patterns.semicolon = P(";")
+patterns.underscore = P("_")
+patterns.escaped = escaped
+patterns.squote = squote
+patterns.dquote = dquote
+patterns.nosquote = (escaped + (1-squote))^0
+patterns.nodquote = (escaped + (1-dquote))^0
+patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle
+patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle
+patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble
+patterns.unspacer = ((patterns.spacer^1)/"")^0
+
+patterns.singlequoted = squote * patterns.nosquote * squote
+patterns.doublequoted = dquote * patterns.nodquote * dquote
+patterns.quoted = patterns.doublequoted + patterns.singlequoted
+
+patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1)
+
+patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1
+patterns.beginline = #(1-newline)
+
+local function anywhere(pattern) --slightly adapted from website
+ return P { P(pattern) + 1 * V(1) }
+end
+
+lpeg.anywhere = anywhere
+
+function lpeg.instringchecker(p)
+ p = anywhere(p)
+ return function(str)
+ return lpegmatch(p,str) and true or false
+ end
+end
+
+function lpeg.splitter(pattern, action)
+ return (((1-P(pattern))^1)/action+1)^0
+end
+
+function lpeg.tsplitter(pattern, action)
+ return Ct((((1-P(pattern))^1)/action+1)^0)
+end
+
+-- probleem: separator can be lpeg and that does not hash too well, but
+-- it's quite okay as the key is then not garbage collected
+
+local splitters_s, splitters_m, splitters_t = { }, { }, { }
+
+local function splitat(separator,single)
+ local splitter = (single and splitters_s[separator]) or splitters_m[separator]
+ if not splitter then
+ separator = P(separator)
+ local other = C((1 - separator)^0)
+ if single then
+ local any = anything
+ splitter = other * (separator * C(any^0) + "") -- ?
+ splitters_s[separator] = splitter
+ else
+ splitter = other * (separator * other)^0
+ splitters_m[separator] = splitter
+ end
+ end
+ return splitter
+end
+
+local function tsplitat(separator)
+ local splitter = splitters_t[separator]
+ if not splitter then
+ splitter = Ct(splitat(separator))
+ splitters_t[separator] = splitter
+ end
+ return splitter
+end
+
+lpeg.splitat = splitat
+lpeg.tsplitat = tsplitat
+
+function string.splitup(str,separator)
+ if not separator then
+ separator = ","
+ end
+ return lpegmatch(splitters_m[separator] or splitat(separator),str)
+end
+
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps
+
+local cache = { }
+
+function lpeg.split(separator,str)
+ local c = cache[separator]
+ if not c then
+ c = tsplitat(separator)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+function string.split(str,separator)
+ if separator then
+ local c = cache[separator]
+ if not c then
+ c = tsplitat(separator)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+ else
+ return { str }
+ end
+end
+
+local spacing = patterns.spacer^0 * newline -- sort of strip
+local empty = spacing * Cc("")
+local nonempty = Cs((1-spacing)^1) * spacing^-1
+local content = (empty + nonempty)^1
+
+patterns.textline = content
+
+local linesplitter = tsplitat(newline)
+
+patterns.linesplitter = linesplitter
+
+function string.splitlines(str)
+ return lpegmatch(linesplitter,str)
+end
+
+-- lpeg.splitters = cache -- no longer public
+
+local cache = { }
+
+function lpeg.checkedsplit(separator,str)
+ local c = cache[separator]
+ if not c then
+ separator = P(separator)
+ local other = C((1 - separator)^1)
+ c = Ct(separator^0 * other * (separator^1 * other)^0)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+function string.checkedsplit(str,separator)
+ local c = cache[separator]
+ if not c then
+ separator = P(separator)
+ local other = C((1 - separator)^1)
+ c = Ct(separator^0 * other * (separator^1 * other)^0)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+-- from roberto's site:
+
+local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
+local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
+local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
+
+local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
+
+patterns.utf8byte = utf8byte
+
+
+
+local cache = { }
+
+function lpeg.stripper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs(((S(str)^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs(((str^1)/"" + 1)^0)
+ end
+end
+
+local cache = { }
+
+function lpeg.keeper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs((((1-S(str))^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs((((1-str)^1)/"" + 1)^0)
+ end
+end
+
+function lpeg.frontstripper(str) -- or pattern (yet undocumented)
+ return (P(str) + P(true)) * Cs(anything^0)
+end
+
+function lpeg.endstripper(str) -- or pattern (yet undocumented)
+ return Cs((1 - P(str) * endofstring)^0)
+end
+
+-- Just for fun I looked at the used bytecode and
+-- p = (p and p + pp) or pp gets one more (testset).
+
+-- todo: cache when string
+
+function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys
+ local pattern
+ local u = isutf and utf8char or 1
+ if type(one) == "table" then
+ local no = #one
+ local p = P(false)
+ if no == 0 then
+ for k, v in next, one do
+ p = p + P(k) / v
+ end
+ pattern = Cs((p + u)^0)
+ elseif no == 1 then
+ local o = one[1]
+ one, two = P(o[1]), o[2]
+ -- pattern = Cs(((1-one)^1 + one/two)^0)
+ pattern = Cs((one/two + u)^0)
+ else
+ for i=1,no do
+ local o = one[i]
+ p = p + P(o[1]) / o[2]
+ end
+ pattern = Cs((p + u)^0)
+ end
+ else
+ pattern = Cs((P(one)/(two or "") + u)^0)
+ end
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+function lpeg.finder(lst,makefunction)
+ local pattern
+ if type(lst) == "table" then
+ pattern = P(false)
+ if #lst == 0 then
+ for k, v in next, lst do
+ pattern = pattern + P(k) -- ignore key, so we can use a replacer table
+ end
+ else
+ for i=1,#lst do
+ pattern = pattern + P(lst[i])
+ end
+ end
+ else
+ pattern = P(lst)
+ end
+ pattern = (1-pattern)^0 * pattern
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+-- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
+-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
+-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
+
+local splitters_f, splitters_s = { }, { }
+
+function lpeg.firstofsplit(separator) -- always return value
+ local splitter = splitters_f[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = C((1 - separator)^0)
+ splitters_f[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.secondofsplit(separator) -- nil if not split
+ local splitter = splitters_s[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = (1 - separator)^0 * separator * C(anything^0)
+ splitters_s[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.balancer(left,right)
+ left, right = P(left), P(right)
+ return P { left * ((1 - left - right) + V(1))^0 * right }
+end
+
+-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de"))
+-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty
+-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc"))
+-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de"))
+-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty
+-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc"))
+-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc"))
+-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc"))
+
+-- -- slower:
+--
+-- function lpeg.counter(pattern)
+-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0
+-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end
+-- end
+
+local nany = utf8char/""
+
+function lpeg.counter(pattern)
+ pattern = Cs((P(pattern)/" " + nany)^0)
+ return function(str)
+ return #lpegmatch(pattern,str)
+ end
+end
+
+-- utf extensies
+
+local utfcharacters = utf and utf.characters or string.utfcharacters
+local utfgmatch = unicode and unicode.utf8.gmatch
+local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char)
+
+lpeg.UP = lpeg.P
+
+if utfcharacters then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfcharacters(str) do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+
+elseif utfgmatch then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfgmatch(str,".") do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+else
+
+ function lpeg.US(str)
+ local p = P(false)
+ local f = function(uc)
+ p = p + P(uc)
+ end
+ lpegmatch((utf8char/f)^0,str)
+ return p
+ end
+
+end
+
+local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture
+
+function lpeg.UR(str,more)
+ local first, last
+ if type(str) == "number" then
+ first = str
+ last = more or first
+ else
+ first, last = lpegmatch(range,str)
+ if not last then
+ return P(str)
+ end
+ end
+ if first == last then
+ return P(str)
+ elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium
+ local p = P(false)
+ for i=first,last do
+ p = p + P(utfchar(i))
+ end
+ return p -- nil when invalid range
+ else
+ local f = function(b)
+ return b >= first and b <= last
+ end
+ -- tricky, these nested captures
+ return utf8byte / f -- nil when invalid range
+ end
+end
+
+-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω"))
+
+-- lpeg.print(lpeg.R("ab","cd","gh"))
+-- lpeg.print(lpeg.P("a","b","c"))
+-- lpeg.print(lpeg.S("a","b","c"))
+
+-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à")))
+-- print(lpeg.count("äáàa",lpeg.UP("áà")))
+-- print(lpeg.count("äáàa",lpeg.US("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR("aá")))
+-- print(lpeg.count("äáàa",lpeg.UR("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF)))
+
+function lpeg.is_lpeg(p)
+ return p and lpegtype(p) == "pattern"
+end
+
+function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
+ if type(list) ~= "table" then
+ list = { list, ... }
+ end
+ -- table.sort(list) -- longest match first
+ local p = P(list[1])
+ for l=2,#list do
+ p = p + P(list[l])
+ end
+ return p
+end
+
+-- For the moment here, but it might move to utilities. Beware, we need to
+-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
+-- loop back from the end cq. prepend.
+
+local sort = table.sort
+
+local function copyindexed(old)
+ local new = { }
+ for i=1,#old do
+ new[i] = old
+ end
+ return new
+end
+
+local function sortedkeys(tab)
+ local keys, s = { }, 0
+ for key,_ in next, tab do
+ s = s + 1
+ keys[s] = key
+ end
+ sort(keys)
+ return keys
+end
+
+function lpeg.append(list,pp,delayed,checked)
+ local p = pp
+ if #list > 0 then
+ local keys = copyindexed(list)
+ sort(keys)
+ for i=#keys,1,-1 do
+ local k = keys[i]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ elseif delayed then -- hm, it looks like the lpeg parser resolves anyway
+ local keys = sortedkeys(list)
+ if p then
+ for i=1,#keys,1 do
+ local k = keys[i]
+ local v = list[k]
+ p = P(k)/list + p
+ end
+ else
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ if p then
+ p = p / list
+ end
+ end
+ elseif checked then
+ -- problem: substitution gives a capture
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ if k == v then
+ p = P(k) + p
+ else
+ p = P(k)/v + p
+ end
+ else
+ if k == v then
+ p = P(k)
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ else
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k)/v + p
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ return p
+end
+
+-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true))
+-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true))
+
+-- function lpeg.exact_match(words,case_insensitive)
+-- local pattern = concat(words)
+-- if case_insensitive then
+-- local pattern = S(upper(characters)) + S(lower(characters))
+-- local list = { }
+-- for i=1,#words do
+-- list[lower(words[i])] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[lower(s)] and i
+-- end)
+-- else
+-- local pattern = S(concat(words))
+-- local list = { }
+-- for i=1,#words do
+-- list[words[i]] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[s] and i
+-- end)
+-- end
+-- end
+
+-- experiment:
+
+local function make(t)
+ local p
+ local keys = sortedkeys(t)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = t[k]
+ if not p then
+ if next(v) then
+ p = P(k) * make(v)
+ else
+ p = P(k)
+ end
+ else
+ if next(v) then
+ p = p + P(k) * make(v)
+ else
+ p = p + P(k)
end
end
- return t
end
+ return p
+end
+function lpeg.utfchartabletopattern(list) -- goes to util-lpg
+ local tree = { }
+ for i=1,#list do
+ local t = tree
+ for c in gmatch(list[i],".") do
+ if not t[c] then
+ t[c] = { }
+ end
+ t = t[c]
+ end
+ end
+ return make(tree)
end
+-- inspect ( lpeg.utfchartabletopattern {
+-- utfchar(0x00A0), -- nbsp
+-- utfchar(0x2000), -- enquad
+-- utfchar(0x2001), -- emquad
+-- utfchar(0x2002), -- enspace
+-- utfchar(0x2003), -- emspace
+-- utfchar(0x2004), -- threeperemspace
+-- utfchar(0x2005), -- fourperemspace
+-- utfchar(0x2006), -- sixperemspace
+-- utfchar(0x2007), -- figurespace
+-- utfchar(0x2008), -- punctuationspace
+-- utfchar(0x2009), -- breakablethinspace
+-- utfchar(0x200A), -- hairspace
+-- utfchar(0x200B), -- zerowidthspace
+-- utfchar(0x202F), -- narrownobreakspace
+-- utfchar(0x205F), -- math thinspace
+-- } )
+
+-- a few handy ones:
+--
+-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3
+
+patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['l-functions'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+functions = functions or { }
+
+function functions.dummy() end
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['l-string'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local string = string
+local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs
+
+-- Some functions are already defined in l-lpeg and maybe some from here will
+-- move there (unless we also expose caches).
+
+-- if not string.split then
+--
+-- function string.split(str,pattern)
+-- local t = { }
+-- if #str > 0 then
+-- local n = 1
+-- for s in gmatch(str..pattern,"(.-)"..pattern) do
+-- t[n] = s
+-- n = n + 1
+-- end
+-- end
+-- return t
+-- end
+--
+-- end
+
+-- function string.unquoted(str)
+-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern
+-- end
+
+local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote
+ + patterns.dquote * C(patterns.nodquote) * patterns.dquote
+
function string.unquoted(str)
- return (gsub(str,"^([\"\'])(.*)%1$","%2"))
+ return lpegmatch(unquoted,str) or str
end
+-- print(string.unquoted("test"))
+-- print(string.unquoted([["t\"est"]]))
+-- print(string.unquoted([["t\"est"x]]))
+-- print(string.unquoted("\'test\'"))
+-- print(string.unquoted('"test"'))
+-- print(string.unquoted('"test"'))
function string.quoted(str)
return format("%q",str) -- always "
@@ -118,65 +1034,112 @@ function string.limit(str,n,sentinel) -- not utf proof
end
end
-local space = S(" \t\v\n")
-local nospace = 1 - space
-local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code
+local stripper = patterns.stripper
+local collapser = patterns.collapser
function string.strip(str)
return lpegmatch(stripper,str) or ""
end
+function string.collapsespaces(str)
+ return lpegmatch(collapser,str) or ""
+end
+
+-- function string.is_empty(str)
+-- return not find(str,"%S")
+-- end
+
+local pattern = P(" ")^0 * P(-1)
+
function string.is_empty(str)
- return not find(str,"%S")
+ if str == "" then
+ return true
+ else
+ return lpegmatch(pattern,str) and true or false
+ end
end
-local patterns_escapes = {
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%(", [")"] = "%)",
- -- ["{"] = "%{", ["}"] = "%}"
- -- ["^"] = "%^", ["$"] = "%$",
-}
-local simple_escapes = {
- ["-"] = "%-",
- ["."] = "%.",
- ["?"] = ".",
- ["*"] = ".*",
-}
+-- if not string.escapedpattern then
+--
+-- local patterns_escapes = {
+-- ["%"] = "%%",
+-- ["."] = "%.",
+-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
+-- ["["] = "%[", ["]"] = "%]",
+-- ["("] = "%(", [")"] = "%)",
+-- -- ["{"] = "%{", ["}"] = "%}"
+-- -- ["^"] = "%^", ["$"] = "%$",
+-- }
+--
+-- local simple_escapes = {
+-- ["-"] = "%-",
+-- ["."] = "%.",
+-- ["?"] = ".",
+-- ["*"] = ".*",
+-- }
+--
+-- function string.escapedpattern(str,simple)
+-- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
+-- end
+--
+-- function string.topattern(str,lowercase,strict)
+-- if str == "" then
+-- return ".*"
+-- else
+-- str = gsub(str,".",simple_escapes)
+-- if lowercase then
+-- str = lower(str)
+-- end
+-- if strict then
+-- return "^" .. str .. "$"
+-- else
+-- return str
+-- end
+-- end
+-- end
+--
+-- end
+
+--- needs checking
+
+local anything = patterns.anything
+local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ?
+local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ?
+local matchescapes = Cc(".") * S("*?") -- wildcard and single match
+
+local pattern_a = Cs ( ( allescapes + anything )^0 )
+local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 )
+local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") )
function string.escapedpattern(str,simple)
- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
+ return lpegmatch(simple and pattern_b or pattern_a,str)
end
function string.topattern(str,lowercase,strict)
if str == "" then
return ".*"
+ elseif strict then
+ str = lpegmatch(pattern_c,str)
else
- str = gsub(str,".",simple_escapes)
- if lowercase then
- str = lower(str)
- end
- if strict then
- return "^" .. str .. "$"
- else
- return str
- end
+ str = lpegmatch(pattern_b,str)
+ end
+ if lowercase then
+ return lower(str)
+ else
+ return str
end
end
+-- print(string.escapedpattern("12+34*.tex",false))
+-- print(string.escapedpattern("12+34*.tex",true))
+-- print(string.topattern ("12+34*.tex",false,false))
+-- print(string.topattern ("12+34*.tex",false,true))
function string.valid(str,default)
return (type(str) == "string" and str ~= "" and str) or default or nil
end
--- obsolete names:
-
-string.quote = string.quoted
-string.unquote = string.unquoted
-
-- handy fallback
string.itself = function(s) return s end
@@ -189,6 +1152,19 @@ function string.totable(str)
return lpegmatch(pattern,str)
end
+-- handy from within tex:
+
+local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
+
+function string.tformat(fmt,...)
+ return format(lpegmatch(replacer,fmt),...)
+end
+
+-- obsolete names:
+
+string.quote = string.quoted
+string.unquote = string.unquoted
+
end -- of closure
@@ -202,68 +1178,23 @@ if not modules then modules = { } end modules ['l-table'] = {
license = "see context related readme files"
}
-local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs
+local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select
local table, string = table, string
local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove
-local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match
+local format, lower, dump = string.format, string.lower, string.dump
local getmetatable, setmetatable = getmetatable, setmetatable
local getinfo = debug.getinfo
-
--- Starting with version 5.2 Lua no longer provide ipairs, which makes
--- sense. As we already used the for loop and # in most places the
--- impact on ConTeXt was not that large; the remaining ipairs already
--- have been replaced. In a similar fashion we also hardly used pairs.
---
--- Hm, actually ipairs was retained, but we no longer use it anyway.
---
--- Just in case, we provide the fallbacks as discussed in Programming
--- in Lua (http://www.lua.org/pil/7.3.html):
-
-if not ipairs then
-
- -- for k, v in ipairs(t) do ... end
- -- for k=1,#t do local v = t[k] ... end
-
- local function iterate(a,i)
- i = i + 1
- local v = a[i]
- if v ~= nil then
- return i, v --, nil
- end
- end
-
- function ipairs(a)
- return iterate, a, 0
- end
-
-end
-
-if not pairs then
-
- -- for k, v in pairs(t) do ... end
- -- for k, v in next, t do ... end
-
- function pairs(t)
- return next, t -- , nil
- end
-
-end
-
--- Also, unpack has been moved to the table table, and for compatiility
--- reasons we provide both now.
-
-if not table.unpack then
- table.unpack = _G.unpack
-elseif not unpack then
- _G.unpack = table.unpack
-end
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local floor = math.floor
-- extra functions, some might go (when not used)
+local stripper = patterns.stripper
+
function table.strip(tab)
local lst, l = { }, 0
for i=1,#tab do
- local s = gsub(tab[i],"^%s*(.-)%s*$","%1")
+ local s = lpegmatch(stripper,tab[i]) or ""
if s == "" then
-- skip this one
else
@@ -372,7 +1303,7 @@ local function sortedhash(t)
end
table.sortedhash = sortedhash
-table.sortedpairs = sortedhash
+table.sortedpairs = sortedhash -- obsolete
function table.append(t,list)
local n = #t
@@ -396,31 +1327,63 @@ function table.prepend(t, list)
return t
end
+-- function table.merge(t, ...) -- first one is target
+-- t = t or { }
+-- local lst = { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- t[k] = v
+-- end
+-- end
+-- return t
+-- end
+
function table.merge(t, ...) -- first one is target
t = t or { }
- local lst = { ... }
- for i=1,#lst do
- for k, v in next, lst[i] do
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
t[k] = v
end
end
return t
end
+-- function table.merged(...)
+-- local tmp, lst = { }, { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- tmp[k] = v
+-- end
+-- end
+-- return tmp
+-- end
+
function table.merged(...)
- local tmp, lst = { }, { ... }
- for i=1,#lst do
- for k, v in next, lst[i] do
- tmp[k] = v
+ local t = { }
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
+ t[k] = v
end
end
- return tmp
+ return t
end
+-- function table.imerge(t, ...)
+-- local lst, nt = { ... }, #t
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- nt = nt + 1
+-- t[nt] = nst[j]
+-- end
+-- end
+-- return t
+-- end
+
function table.imerge(t, ...)
- local lst, nt = { ... }, #t
- for i=1,#lst do
- local nst = lst[i]
+ local nt = #t
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
nt = nt + 1
t[nt] = nst[j]
@@ -429,10 +1392,22 @@ function table.imerge(t, ...)
return t
end
+-- function table.imerged(...)
+-- local tmp, ntmp, lst = { }, 0, {...}
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- ntmp = ntmp + 1
+-- tmp[ntmp] = nst[j]
+-- end
+-- end
+-- return tmp
+-- end
+
function table.imerged(...)
- local tmp, ntmp, lst = { }, 0, {...}
- for i=1,#lst do
- local nst = lst[i]
+ local tmp, ntmp = { }, 0
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
ntmp = ntmp + 1
tmp[ntmp] = nst[j]
@@ -444,7 +1419,7 @@ end
local function fastcopy(old,metatabletoo) -- fast one
if old then
local new = { }
- for k,v in next, old do
+ for k, v in next, old do
if type(v) == "table" then
new[k] = fastcopy(v,metatabletoo) -- was just table.copy
else
@@ -498,7 +1473,7 @@ end
table.fastcopy = fastcopy
table.copy = copy
-function table.derive(parent)
+function table.derive(parent) -- for the moment not public
local child = { }
if parent then
setmetatable(child,{ __index = parent })
@@ -579,6 +1554,13 @@ end
-- problem: there no good number_to_string converter with the best resolution
+-- probably using .. is faster than format
+-- maybe split in a few cases (yes/no hexify)
+
+-- todo: %g faster on numbers than %s
+
+local propername = patterns.propername -- was find(name,"^%a[%w%_]*$")
+
local function dummy() end
local function do_serialize(root,name,depth,level,indexed)
@@ -588,14 +1570,14 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s{",depth))
else
local tn = type(name)
- if tn == "number" then -- or find(k,"^%d+$") then
+ if tn == "number" then
if hexify then
handle(format("%s[0x%04X]={",depth,name))
else
handle(format("%s[%s]={",depth,name))
end
elseif tn == "string" then
- if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
+ if noquotes and not reserved[name] and lpegmatch(propername,name) then
handle(format("%s%s={",depth,name))
else
handle(format("%s[%q]={",depth,name))
@@ -621,7 +1603,6 @@ local function do_serialize(root,name,depth,level,indexed)
if compact then
last = #root
for k=1,last do
--- if not root[k] then
if root[k] == nil then
last = k - 1
break
@@ -667,7 +1648,7 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s %s,",depth,tostring(v)))
elseif t == "function" then
if functions then
- handle(format('%s loadstring(%q),',depth,dump(v)))
+ handle(format('%s load(%q),',depth,dump(v)))
else
handle(format('%s "function",',depth))
end
@@ -679,7 +1660,7 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s __p__=nil,",depth))
end
elseif t == "number" then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=0x%04X,",depth,k,v))
else
@@ -691,7 +1672,7 @@ local function do_serialize(root,name,depth,level,indexed)
else
handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
if hexify then
handle(format("%s %s=0x%04X,",depth,k,v))
else
@@ -706,7 +1687,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "string" then
if reduce and tonumber(v) then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,v))
else
@@ -714,13 +1695,13 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]=%s,",depth,tostring(k),v))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,v))
else
handle(format("%s [%q]=%s,",depth,k,v))
end
else
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,v))
else
@@ -728,7 +1709,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]=%q,",depth,tostring(k),v))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,v))
else
handle(format("%s [%q]=%q,",depth,k,v))
@@ -736,7 +1717,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "table" then
if not next(v) then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={},",depth,k))
else
@@ -744,7 +1725,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]={},",depth,tostring(k)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={},",depth,k))
else
handle(format("%s [%q]={},",depth,k))
@@ -752,15 +1733,15 @@ local function do_serialize(root,name,depth,level,indexed)
elseif inline then
local st = simple_table(v)
if st then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%s]={ %s },",depth,k,concat(st,", ")))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", ")))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%q]={ %s },",depth,k,concat(st,", ")))
@@ -772,15 +1753,15 @@ local function do_serialize(root,name,depth,level,indexed)
do_serialize(v,k,depth,level+1)
end
elseif t == "boolean" then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%s,",depth,k,tostring(v)))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%s,",depth,k,tostring(v)))
@@ -789,30 +1770,30 @@ local function do_serialize(root,name,depth,level,indexed)
if functions then
local f = getinfo(v).what == "C" and dump(dummy) or dump(v)
-- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v)
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
- handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f))
+ handle(format("%s [0x%04X]=load(%q),",depth,k,f))
else
- handle(format("%s [%s]=loadstring(%q),",depth,k,f))
+ handle(format("%s [%s]=load(%q),",depth,k,f))
end
elseif tk == "boolean" then
- handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
- handle(format("%s %s=loadstring(%q),",depth,k,f))
+ handle(format("%s [%s]=load(%q),",depth,tostring(k),f))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+ handle(format("%s %s=load(%q),",depth,k,f))
else
- handle(format("%s [%q]=loadstring(%q),",depth,k,f))
+ handle(format("%s [%q]=load(%q),",depth,k,f))
end
end
else
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%q,",depth,k,tostring(v)))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%q,",depth,k,tostring(v)))
@@ -892,6 +1873,14 @@ local function serialize(_handle,root,name,specification) -- handle wins
handle("}")
end
+-- name:
+--
+-- true : return { }
+-- false : { }
+-- nil : t = { }
+-- string : string = { }
+-- "return" : return { }
+-- number : [number] = { }
function table.serialize(root,name,specification)
local t, n = { }, 0
@@ -980,7 +1969,7 @@ table.flattened = flattened
local function unnest(t,f) -- only used in mk, for old times sake
if not f then -- and only relevant for token lists
- f = { }
+ f = { } -- this one can become obsolete
end
for i=1,#t do
local v = t[i]
@@ -1009,7 +1998,7 @@ local function are_equal(a,b,n,m) -- indexed
local ai, bi = a[i], b[i]
if ai==bi then
-- same
- elseif type(ai)=="table" and type(bi)=="table" then
+ elseif type(ai) == "table" and type(bi) == "table" then
if not are_equal(ai,bi) then
return false
end
@@ -1044,10 +2033,10 @@ table.are_equal = are_equal
-- maybe also make a combined one
-function table.compact(t)
+function table.compact(t) -- remove empty tables, assumes subtables
if t then
- for k,v in next, t do
- if not next(v) then
+ for k, v in next, t do
+ if not next(v) then -- no type checking
t[k] = nil
end
end
@@ -1086,7 +2075,7 @@ function table.swapped(t,s) -- hash
return n
end
-function table.mirror(t) -- hash
+function table.mirrored(t) -- hash
local n = { }
for k, v in next, t do
n[v] = k
@@ -1109,6 +2098,17 @@ function table.reversed(t)
end
end
+function table.reverse(t)
+ if t then
+ local n = #t
+ for i=1,floor(n/2) do
+ local j = n - i + 1
+ t[i], t[j] = t[j], t[i]
+ end
+ return t
+ end
+end
+
function table.sequenced(t,sep) -- hash only
if t then
local s, n = { }, 0
@@ -1196,848 +2196,6 @@ end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-lpeg'] = {
- version = 1.001,
- comment = "companion to luat-lib.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-
--- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
-
-local lpeg = require("lpeg")
-
--- tracing (only used when we encounter a problem in integration of lpeg in luatex)
-
--- some code will move to unicode and string
-
-local report = texio and texio.write_nl or print
-
--- local lpmatch = lpeg.match
--- local lpprint = lpeg.print
--- local lpp = lpeg.P
--- local lpr = lpeg.R
--- local lps = lpeg.S
--- local lpc = lpeg.C
--- local lpb = lpeg.B
--- local lpv = lpeg.V
--- local lpcf = lpeg.Cf
--- local lpcb = lpeg.Cb
--- local lpcg = lpeg.Cg
--- local lpct = lpeg.Ct
--- local lpcs = lpeg.Cs
--- local lpcc = lpeg.Cc
--- local lpcmt = lpeg.Cmt
--- local lpcarg = lpeg.Carg
-
--- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end
-
--- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end
--- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end
--- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end
--- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end
--- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end
--- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end
--- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end
--- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end
--- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end
--- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end
--- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end
--- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end
--- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end
--- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end
-
-local type, next = type, next
-local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
-
--- Beware, we predefine a bunch of patterns here and one reason for doing so
--- is that we get consistent behaviour in some of the visualizers.
-
-lpeg.patterns = lpeg.patterns or { } -- so that we can share
-local patterns = lpeg.patterns
-
-local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp
-local lpegtype, lpegmatch = lpeg.type, lpeg.match
-
-local utfcharacters = string.utfcharacters
-local utfgmatch = unicode and unicode.utf8.gmatch
-
-local anything = P(1)
-local endofstring = P(-1)
-local alwaysmatched = P(true)
-
-patterns.anything = anything
-patterns.endofstring = endofstring
-patterns.beginofstring = alwaysmatched
-patterns.alwaysmatched = alwaysmatched
-
-local digit, sign = R('09'), S('+-')
-local cr, lf, crlf = P("\r"), P("\n"), P("\r\n")
-local newline = crlf + S("\r\n") -- cr + lf
-local escaped = P("\\") * anything
-local squote = P("'")
-local dquote = P('"')
-local space = P(" ")
-
-local utfbom_32_be = P('\000\000\254\255')
-local utfbom_32_le = P('\255\254\000\000')
-local utfbom_16_be = P('\255\254')
-local utfbom_16_le = P('\254\255')
-local utfbom_8 = P('\239\187\191')
-local utfbom = utfbom_32_be + utfbom_32_le
- + utfbom_16_be + utfbom_16_le
- + utfbom_8
-local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le"
- + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le"
- + utfbom_8 / "utf-8" + alwaysmatched / "unknown"
-
-local utf8next = R("\128\191")
-
-patterns.utf8one = R("\000\127")
-patterns.utf8two = R("\194\223") * utf8next
-patterns.utf8three = R("\224\239") * utf8next * utf8next
-patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next
-patterns.utfbom = utfbom
-patterns.utftype = utftype
-
-local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
-local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false)
-
-patterns.utf8 = utf8char
-patterns.utf8char = utf8char
-patterns.validutf8 = validutf8char
-patterns.validutf8char = validutf8char
-
-local eol = S("\n\r")
-local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
-local whitespace = eol + spacer
-
-patterns.digit = digit
-patterns.sign = sign
-patterns.cardinal = sign^0 * digit^1
-patterns.integer = sign^0 * digit^1
-patterns.unsigned = digit^0 * P('.') * digit^1
-patterns.float = sign^0 * patterns.unsigned
-patterns.cunsigned = digit^0 * P(',') * digit^1
-patterns.cfloat = sign^0 * patterns.cunsigned
-patterns.number = patterns.float + patterns.integer
-patterns.cnumber = patterns.cfloat + patterns.integer
-patterns.oct = P("0") * R("07")^1
-patterns.octal = patterns.oct
-patterns.HEX = P("0x") * R("09","AF")^1
-patterns.hex = P("0x") * R("09","af")^1
-patterns.hexadecimal = P("0x") * R("09","AF","af")^1
-patterns.lowercase = R("az")
-patterns.uppercase = R("AZ")
-patterns.letter = patterns.lowercase + patterns.uppercase
-patterns.space = space
-patterns.tab = P("\t")
-patterns.spaceortab = patterns.space + patterns.tab
-patterns.eol = eol
-patterns.spacer = spacer
-patterns.whitespace = whitespace
-patterns.newline = newline
-patterns.emptyline = newline^1
-patterns.nonspacer = 1 - spacer
-patterns.nonwhitespace = 1 - whitespace
-patterns.equal = P("=")
-patterns.comma = P(",")
-patterns.commaspacer = P(",") * spacer^0
-patterns.period = P(".")
-patterns.colon = P(":")
-patterns.semicolon = P(";")
-patterns.underscore = P("_")
-patterns.escaped = escaped
-patterns.squote = squote
-patterns.dquote = dquote
-patterns.nosquote = (escaped + (1-squote))^0
-patterns.nodquote = (escaped + (1-dquote))^0
-patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"")
-patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"")
-patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble
-patterns.unspacer = ((patterns.spacer^1)/"")^0
-
-patterns.singlequoted = squote * patterns.nosquote * squote
-patterns.doublequoted = dquote * patterns.nodquote * dquote
-patterns.quoted = patterns.doublequoted + patterns.singlequoted
-
-patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1
-patterns.beginline = #(1-newline)
-
--- print(string.unquoted("test"))
--- print(string.unquoted([["t\"est"]]))
--- print(string.unquoted([["t\"est"x]]))
--- print(string.unquoted("\'test\'"))
--- print(string.unquoted('"test"'))
--- print(string.unquoted('"test"'))
-
-local function anywhere(pattern) --slightly adapted from website
- return P { P(pattern) + 1 * V(1) }
-end
-
-lpeg.anywhere = anywhere
-
-function lpeg.instringchecker(p)
- p = anywhere(p)
- return function(str)
- return lpegmatch(p,str) and true or false
- end
-end
-
-function lpeg.splitter(pattern, action)
- return (((1-P(pattern))^1)/action+1)^0
-end
-
-function lpeg.tsplitter(pattern, action)
- return Ct((((1-P(pattern))^1)/action+1)^0)
-end
-
--- probleem: separator can be lpeg and that does not hash too well, but
--- it's quite okay as the key is then not garbage collected
-
-local splitters_s, splitters_m, splitters_t = { }, { }, { }
-
-local function splitat(separator,single)
- local splitter = (single and splitters_s[separator]) or splitters_m[separator]
- if not splitter then
- separator = P(separator)
- local other = C((1 - separator)^0)
- if single then
- local any = anything
- splitter = other * (separator * C(any^0) + "") -- ?
- splitters_s[separator] = splitter
- else
- splitter = other * (separator * other)^0
- splitters_m[separator] = splitter
- end
- end
- return splitter
-end
-
-local function tsplitat(separator)
- local splitter = splitters_t[separator]
- if not splitter then
- splitter = Ct(splitat(separator))
- splitters_t[separator] = splitter
- end
- return splitter
-end
-
-lpeg.splitat = splitat
-lpeg.tsplitat = tsplitat
-
-function string.splitup(str,separator)
- if not separator then
- separator = ","
- end
- return lpegmatch(splitters_m[separator] or splitat(separator),str)
-end
-
-
-local cache = { }
-
-function lpeg.split(separator,str)
- local c = cache[separator]
- if not c then
- c = tsplitat(separator)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-function string.split(str,separator)
- if separator then
- local c = cache[separator]
- if not c then
- c = tsplitat(separator)
- cache[separator] = c
- end
- return lpegmatch(c,str)
- else
- return { str }
- end
-end
-
-local spacing = patterns.spacer^0 * newline -- sort of strip
-local empty = spacing * Cc("")
-local nonempty = Cs((1-spacing)^1) * spacing^-1
-local content = (empty + nonempty)^1
-
-patterns.textline = content
-
-
-local linesplitter = tsplitat(newline)
-
-patterns.linesplitter = linesplitter
-
-function string.splitlines(str)
- return lpegmatch(linesplitter,str)
-end
-
-local utflinesplitter = utfbom^-1 * tsplitat(newline)
-
-patterns.utflinesplitter = utflinesplitter
-
-function string.utfsplitlines(str)
- return lpegmatch(utflinesplitter,str or "")
-end
-
-local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0)
-local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0)
-
-function string.utfsplit(str,ignorewhitespace) -- new
- if ignorewhitespace then
- return lpegmatch(utfcharsplitter_iws,str or "")
- else
- return lpegmatch(utfcharsplitter_ows,str or "")
- end
-end
-
--- inspect(string.utfsplit("a b c d"))
--- inspect(string.utfsplit("a b c d",true))
-
--- -- alternative 1: 0.77
---
--- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0)
---
--- function string.utflength(str)
--- return #lpegmatch(utfcharcounter,str or "")
--- end
---
--- -- alternative 2: 1.70
---
--- local n = 0
---
--- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow
---
--- function string.utflength(str)
--- n = 0
--- lpegmatch(utfcharcounter,str or "")
--- return n
--- end
---
--- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
-
-local n = 0
-
-local utfcharcounter = utfbom^-1 * Cs ( (
- Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
- + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
- + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
- + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
-)^0 )
-
-function string.utflength(str)
- n = 0
- lpegmatch(utfcharcounter,str or "")
- return n
-end
-
-
-local cache = { }
-
-function lpeg.checkedsplit(separator,str)
- local c = cache[separator]
- if not c then
- separator = P(separator)
- local other = C((1 - separator)^1)
- c = Ct(separator^0 * other * (separator^1 * other)^0)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-function string.checkedsplit(str,separator)
- local c = cache[separator]
- if not c then
- separator = P(separator)
- local other = C((1 - separator)^1)
- c = Ct(separator^0 * other * (separator^1 * other)^0)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-
-local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
-local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
-local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
-
-local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
-
-patterns.utf8byte = utf8byte
-
-
-
-local cache = { }
-
-function lpeg.stripper(str)
- if type(str) == "string" then
- local s = cache[str]
- if not s then
- s = Cs(((S(str)^1)/"" + 1)^0)
- cache[str] = s
- end
- return s
- else
- return Cs(((str^1)/"" + 1)^0)
- end
-end
-
-local cache = { }
-
-function lpeg.keeper(str)
- if type(str) == "string" then
- local s = cache[str]
- if not s then
- s = Cs((((1-S(str))^1)/"" + 1)^0)
- cache[str] = s
- end
- return s
- else
- return Cs((((1-str)^1)/"" + 1)^0)
- end
-end
-
-function lpeg.frontstripper(str) -- or pattern (yet undocumented)
- return (P(str) + P(true)) * Cs(anything^0)
-end
-
-function lpeg.endstripper(str) -- or pattern (yet undocumented)
- return Cs((1 - P(str) * endofstring)^0)
-end
-
--- Just for fun I looked at the used bytecode and
--- p = (p and p + pp) or pp gets one more (testset).
-
-function lpeg.replacer(one,two,makefunction)
- local pattern
- if type(one) == "table" then
- local no = #one
- local p = P(false)
- if no == 0 then
- for k, v in next, one do
- p = p + P(k) / v
- end
- pattern = Cs((p + 1)^0)
- elseif no == 1 then
- local o = one[1]
- one, two = P(o[1]), o[2]
- -- pattern = Cs(((1-one)^1 + one/two)^0)
- pattern = Cs((one/two + 1)^0)
- else
- for i=1,no do
- local o = one[i]
- p = p + P(o[1]) / o[2]
- end
- pattern = Cs((p + 1)^0)
- end
- else
- one = P(one)
- two = two or ""
- -- pattern = Cs(((1-one)^1 + one/two)^0)
- pattern = Cs((one/two +1)^0)
- end
- if makefunction then
- return function(str)
- return lpegmatch(pattern,str)
- end
- else
- return pattern
- end
-end
-
-function lpeg.finder(lst,makefunction)
- local pattern
- if type(lst) == "table" then
- local p = P(false)
- for i=1,#lst do
- p = p + P(lst[i])
- end
- pattern = (p + 1)^0
- else
- pattern = (P(lst) + 1)^0
- end
- if makefunction then
- return function(str)
- return lpegmatch(pattern,str)
- end
- else
- return pattern
- end
-end
-
--- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
--- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
--- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
-
-local splitters_f, splitters_s = { }, { }
-
-function lpeg.firstofsplit(separator) -- always return value
- local splitter = splitters_f[separator]
- if not splitter then
- separator = P(separator)
- splitter = C((1 - separator)^0)
- splitters_f[separator] = splitter
- end
- return splitter
-end
-
-function lpeg.secondofsplit(separator) -- nil if not split
- local splitter = splitters_s[separator]
- if not splitter then
- separator = P(separator)
- splitter = (1 - separator)^0 * separator * C(anything^0)
- splitters_s[separator] = splitter
- end
- return splitter
-end
-
-function lpeg.balancer(left,right)
- left, right = P(left), P(right)
- return P { left * ((1 - left - right) + V(1))^0 * right }
-end
-
-
-
-local nany = utf8char/""
-
-function lpeg.counter(pattern)
- pattern = Cs((P(pattern)/" " + nany)^0)
- return function(str)
- return #lpegmatch(pattern,str)
- end
-end
-
-if utfgmatch then
-
- function lpeg.count(str,what) -- replaces string.count
- if type(what) == "string" then
- local n = 0
- for _ in utfgmatch(str,what) do
- n = n + 1
- end
- return n
- else -- 4 times slower but still faster than / function
- return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
- end
- end
-
-else
-
- local cache = { }
-
- function lpeg.count(str,what) -- replaces string.count
- if type(what) == "string" then
- local p = cache[what]
- if not p then
- p = Cs((P(what)/" " + nany)^0)
- cache[p] = p
- end
- return #lpegmatch(p,str)
- else -- 4 times slower but still faster than / function
- return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
- end
- end
-
-end
-
-local patterns_escapes = { -- also defines in l-string
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%)", [")"] = "%)",
- -- ["{"] = "%{", ["}"] = "%}"
- -- ["^"] = "%^", ["$"] = "%$",
-}
-
-local simple_escapes = { -- also defines in l-string
- ["-"] = "%-",
- ["."] = "%.",
- ["?"] = ".",
- ["*"] = ".*",
-}
-
-local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0)
-local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0)
-
-function string.escapedpattern(str,simple)
- return lpegmatch(simple and s or p,str)
-end
-
--- utf extensies
-
-lpeg.UP = lpeg.P
-
-if utfcharacters then
-
- function lpeg.US(str)
- local p = P(false)
- for uc in utfcharacters(str) do
- p = p + P(uc)
- end
- return p
- end
-
-
-elseif utfgmatch then
-
- function lpeg.US(str)
- local p = P(false)
- for uc in utfgmatch(str,".") do
- p = p + P(uc)
- end
- return p
- end
-
-else
-
- function lpeg.US(str)
- local p = P(false)
- local f = function(uc)
- p = p + P(uc)
- end
- lpegmatch((utf8char/f)^0,str)
- return p
- end
-
-end
-
-local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture
-
-local utfchar = unicode and unicode.utf8 and unicode.utf8.char
-
-function lpeg.UR(str,more)
- local first, last
- if type(str) == "number" then
- first = str
- last = more or first
- else
- first, last = lpegmatch(range,str)
- if not last then
- return P(str)
- end
- end
- if first == last then
- return P(str)
- elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium
- local p = P(false)
- for i=first,last do
- p = p + P(utfchar(i))
- end
- return p -- nil when invalid range
- else
- local f = function(b)
- return b >= first and b <= last
- end
- -- tricky, these nested captures
- return utf8byte / f -- nil when invalid range
- end
-end
-
--- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω"))
-
-
-
-function lpeg.is_lpeg(p)
- return p and lpegtype(p) == "pattern"
-end
-
-function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
- if type(list) ~= "table" then
- list = { list, ... }
- end
- -- table.sort(list) -- longest match first
- local p = P(list[1])
- for l=2,#list do
- p = p + P(list[l])
- end
- return p
-end
-
--- For the moment here, but it might move to utilities. Beware, we need to
--- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
--- loop back from the end cq. prepend.
-
-local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency!
-
-function lpeg.append(list,pp,delayed,checked)
- local p = pp
- if #list > 0 then
- local keys = fastcopy(list)
- sort(keys)
- for i=#keys,1,-1 do
- local k = keys[i]
- if p then
- p = P(k) + p
- else
- p = P(k)
- end
- end
- elseif delayed then -- hm, it looks like the lpeg parser resolves anyway
- local keys = sortedkeys(list)
- if p then
- for i=1,#keys,1 do
- local k = keys[i]
- local v = list[k]
- p = P(k)/list + p
- end
- else
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- p = P(k) + p
- else
- p = P(k)
- end
- end
- if p then
- p = p / list
- end
- end
- elseif checked then
- -- problem: substitution gives a capture
- local keys = sortedkeys(list)
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- if k == v then
- p = P(k) + p
- else
- p = P(k)/v + p
- end
- else
- if k == v then
- p = P(k)
- else
- p = P(k)/v
- end
- end
- end
- else
- local keys = sortedkeys(list)
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- p = P(k)/v + p
- else
- p = P(k)/v
- end
- end
- end
- return p
-end
-
--- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true))
--- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true))
-
--- function lpeg.exact_match(words,case_insensitive)
--- local pattern = concat(words)
--- if case_insensitive then
--- local pattern = S(upper(characters)) + S(lower(characters))
--- local list = { }
--- for i=1,#words do
--- list[lower(words[i])] = true
--- end
--- return Cmt(pattern^1, function(_,i,s)
--- return list[lower(s)] and i
--- end)
--- else
--- local pattern = S(concat(words))
--- local list = { }
--- for i=1,#words do
--- list[words[i]] = true
--- end
--- return Cmt(pattern^1, function(_,i,s)
--- return list[s] and i
--- end)
--- end
--- end
-
--- experiment:
-
-local function make(t)
- local p
--- for k, v in next, t do
- for k, v in table.sortedhash(t) do
- if not p then
- if next(v) then
- p = P(k) * make(v)
- else
- p = P(k)
- end
- else
- if next(v) then
- p = p + P(k) * make(v)
- else
- p = p + P(k)
- end
- end
- end
- return p
-end
-
-function lpeg.utfchartabletopattern(list)
- local tree = { }
- for i=1,#list do
- local t = tree
- for c in gmatch(list[i],".") do
- if not t[c] then
- t[c] = { }
- end
- t = t[c]
- end
- end
- return make(tree)
-end
-
--- inspect ( lpeg.utfchartabletopattern {
--- utfchar(0x00A0), -- nbsp
--- utfchar(0x2000), -- enquad
--- utfchar(0x2001), -- emquad
--- utfchar(0x2002), -- enspace
--- utfchar(0x2003), -- emspace
--- utfchar(0x2004), -- threeperemspace
--- utfchar(0x2005), -- fourperemspace
--- utfchar(0x2006), -- sixperemspace
--- utfchar(0x2007), -- figurespace
--- utfchar(0x2008), -- punctuationspace
--- utfchar(0x2009), -- breakablethinspace
--- utfchar(0x200A), -- hairspace
--- utfchar(0x200B), -- zerowidthspace
--- utfchar(0x202F), -- narrownobreakspace
--- utfchar(0x205F), -- math thinspace
--- } )
-
--- handy from within tex:
-
-local lpegmatch = lpeg.match
-
-local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
-
-function string.tformat(fmt,...)
- return format(lpegmatch(replacer,fmt),...)
-end
-
--- strips leading and trailing spaces and collapsed all other spaces
-
-local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0)
-
-function string.collapsespaces(str)
- return lpegmatch(pattern,str)
-end
-
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
if not modules then modules = { } end modules ['l-io'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
@@ -2368,7 +2526,7 @@ function io.readstring(f,n,m)
f:seek("set",n)
n = m
end
- local str = gsub(f:read(n),"%z","")
+ local str = gsub(f:read(n),"\000","")
return str
end
@@ -2418,10 +2576,129 @@ local lpegmatch = lpeg.match
number = number or { }
local number = number
--- a,b,c,d,e,f = number.toset(100101)
+if bit32 then
+
+ local btest, bor = bit32.btest, bit32.bor
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ number.hasbit = btest
+ number.setbit = bor
+
+ function number.setbit(x,p)
+ return btest(x,p) and x or x + p
+ end
+
+ function number.clearbit(x,p)
+ return btest(x,p) and x - p or x
+ end
+
+else
+
+ -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ...
+ return x % (p + p) >= p
+ end
+
+ function number.setbit(x, p)
+ return (x % (p + p) >= p) and x or x + p
+ end
+
+ function number.clearbit(x, p)
+ return (x % (p + p) >= p) and x - p or x
+ end
+
+end
+
+-- print(number.tobitstring(8))
+-- print(number.tobitstring(14))
+-- print(number.tobitstring(66))
+-- print(number.tobitstring(0x00))
+-- print(number.tobitstring(0xFF))
+-- print(number.tobitstring(46260767936,4))
+
+if bit32 then
+
+ local bextract = bit32.extract
+
+ local t = {
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ }
+
+ function number.tobitstring(b,m)
+ -- if really needed we can speed this one up
+ -- because small numbers need less extraction
+ local n = 32
+ for i=0,31 do
+ local v = bextract(b,i)
+ local k = 32 - i
+ if v == 1 then
+ n = k
+ t[k] = "1"
+ else
+ t[k] = "0"
+ end
+ end
+ if m then
+ m = 33 - m * 8
+ if m < 1 then
+ m = 1
+ end
+ return concat(t,"",m)
+ elseif n < 8 then
+ return concat(t)
+ elseif n < 16 then
+ return concat(t,"",9)
+ elseif n < 24 then
+ return concat(t,"",17)
+ else
+ return concat(t,"",25)
+ end
+ end
+
+else
+
+ function number.tobitstring(n,m)
+ if n > 0 then
+ local t = { }
+ while n > 0 do
+ insert(t,1,n % 2 > 0 and 1 or 0)
+ n = floor(n/2)
+ end
+ local nn = 8 - #t % 8
+ if nn > 0 and nn < 8 then
+ for i=1,nn do
+ insert(t,1,0)
+ end
+ end
+ if m then
+ m = m * 8 - #t
+ if m > 0 then
+ insert(t,1,rep("0",m))
+ end
+ end
+ return concat(t)
+ elseif m then
+ rep("00000000",m)
+ else
+ return "00000000"
+ end
+ end
-function number.toset(n)
- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+end
+
+function number.valid(str,default)
+ return tonumber(str) or default or nil
end
function number.toevenhex(n)
@@ -2433,86 +2710,59 @@ function number.toevenhex(n)
end
end
--- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
--- on
+-- a,b,c,d,e,f = number.toset(100101)
+--
+-- function number.toset(n)
+-- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+-- end
+--
+-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
+-- -- on
--
-- for i=1,1000000 do
-- local a,b,c,d,e,f,g,h = number.toset(12345678)
-- local a,b,c,d = number.toset(1234)
-- local a,b,c = number.toset(123)
+-- local a,b,c = number.toset("123")
-- end
---
--- of course dedicated "(.)(.)(.)(.)" matches are even faster
-local one = lpeg.C(1-lpeg.S(''))^1
+local one = lpeg.C(1-lpeg.S('')/tonumber)^1
function number.toset(n)
return lpegmatch(one,tostring(n))
end
-function number.bits(n,zero)
- local t, i = { }, (zero and 0) or 1
- while n > 0 do
+-- function number.bits(n,zero)
+-- local t, i = { }, (zero and 0) or 1
+-- while n > 0 do
+-- local m = n % 2
+-- if m > 0 then
+-- insert(t,1,i)
+-- end
+-- n = floor(n/2)
+-- i = i + 1
+-- end
+-- return t
+-- end
+--
+-- -- a bit faster
+
+local function bits(n,i,...)
+ if n > 0 then
local m = n % 2
+ local n = floor(n/2)
if m > 0 then
- insert(t,1,i)
- end
- n = floor(n/2)
- i = i + 1
- end
- return t
-end
-
-
-function number.bit(p)
- return 2 ^ (p - 1) -- 1-based indexing
-end
-
-function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ...
- return x % (p + p) >= p
-end
-
-function number.setbit(x, p)
- return (x % (p + p) >= p) and x or x + p
-end
-
-function number.clearbit(x, p)
- return (x % (p + p) >= p) and x - p or x
-end
-
-
-function number.tobitstring(n,m)
- if n == 0 then
- if m then
- rep("00000000",m)
+ return bits(n, i+1, i, ...)
else
- return "00000000"
+ return bits(n, i+1, ...)
end
else
- local t = { }
- while n > 0 do
- insert(t,1,n % 2 > 0 and 1 or 0)
- n = floor(n/2)
- end
- local nn = 8 - #t % 8
- if nn > 0 and nn < 8 then
- for i=1,nn do
- insert(t,1,0)
- end
- end
- if m then
- m = m * 8 - #t
- if m > 0 then
- insert(t,1,rep("0",m))
- end
- end
- return concat(t)
+ return ...
end
end
-
-function number.valid(str,default)
- return tonumber(str) or default or nil
+function number.bits(n)
+ return { bits(n,1) }
end
@@ -3104,25 +3354,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1
local pattern = C((noslashes^0 * slashes^1)^1)
local function pathpart(name,default)
- return lpegmatch(pattern,name) or default or ""
+ return name and lpegmatch(pattern,name) or default or ""
end
local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1
local function basename(name)
- return lpegmatch(pattern,name) or name
+ return name and lpegmatch(pattern,name) or name
end
local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0
local function nameonly(name)
- return lpegmatch(pattern,name) or name
+ return name and lpegmatch(pattern,name) or name
end
local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1
local function suffixonly(name)
- return lpegmatch(pattern,name) or ""
+ return name and lpegmatch(pattern,name) or ""
end
file.pathpart = pathpart
@@ -3153,7 +3403,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture
local pattern_d = path * rest
function file.splitname(str,splitdrive)
- if splitdrive then
+ if not str then
+ -- error
+ elseif splitdrive then
return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix
else
return lpegmatch(pattern_b,str) -- returns path, base, suffix
@@ -3161,34 +3413,36 @@ function file.splitname(str,splitdrive)
end
function file.splitbase(str)
- return lpegmatch(pattern_d,str) -- returns path, base+suffix
+ return str and lpegmatch(pattern_d,str) -- returns path, base+suffix
end
function file.nametotable(str,splitdrive) -- returns table
- local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
- if splitdrive then
- return {
- path = path,
- drive = drive,
- subpath = subpath,
- name = name,
- base = base,
- suffix = suffix,
- }
- else
- return {
- path = path,
- name = name,
- base = base,
- suffix = suffix,
- }
+ if str then
+ local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
+ if splitdrive then
+ return {
+ path = path,
+ drive = drive,
+ subpath = subpath,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ else
+ return {
+ path = path,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ end
end
end
local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1)
function file.removesuffix(name)
- return lpegmatch(pattern,name)
+ return name and lpegmatch(pattern,name)
end
-- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1
@@ -3205,8 +3459,8 @@ end
local suffix = period/"" * (1-period-slashes)^1 * -1
local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix)
-function file.addsuffix(filename, suffix, criterium)
- if not suffix or suffix == "" then
+function file.addsuffix(filename,suffix,criterium)
+ if not filename or not suffix or suffix == "" then
return filename
elseif criterium == true then
return filename .. "." .. suffix
@@ -3252,7 +3506,7 @@ local suffix = period * (1-period-slashes)^1 * -1
local pattern = Cs((1-suffix)^0)
function file.replacesuffix(name,suffix)
- if suffix and suffix ~= "" then
+ if name and suffix and suffix ~= "" then
return lpegmatch(pattern,name) .. "." .. suffix
else
return name
@@ -3261,10 +3515,10 @@ end
--
-local reslasher = lpeg.replacer(S("\\"),"/")
+local reslasher = lpeg.replacer(P("\\"),"/")
function file.reslash(str)
- return lpegmatch(reslasher,str)
+ return str and lpegmatch(reslasher,str)
end
-- We should be able to use:
@@ -3280,7 +3534,9 @@ end
-- variant:
function file.is_writable(name)
- if lfs.isdir(name) then
+ if not name then
+ -- error
+ elseif lfs.isdir(name) then
name = name .. "/m_t_x_t_e_s_t.tmp"
local f = io.open(name,"wb")
if f then
@@ -3308,24 +3564,32 @@ end
local readable = P("r") * Cc(true)
function file.is_readable(name)
- local a = attributes(name)
- return a and lpegmatch(readable,a.permissions) or false
+ if name then
+ local a = attributes(name)
+ return a and lpegmatch(readable,a.permissions) or false
+ else
+ return false
+ end
end
file.isreadable = file.is_readable -- depricated
file.iswritable = file.is_writable -- depricated
function file.size(name)
- local a = attributes(name)
- return a and a.size or 0
+ if name then
+ local a = attributes(name)
+ return a and a.size or 0
+ else
+ return 0
+ end
end
function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split)
- return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
+ return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
end
function file.joinpath(tab,separator) -- table
- return concat(tab,separator or io.pathseparator) -- can have trailing //
+ return tab and concat(tab,separator or io.pathseparator) -- can have trailing //
end
local stripper = Cs(P(fwslash)^0/"" * reslasher)
@@ -3333,14 +3597,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon
local isroot = fwslash^1 * -1
local hasroot = fwslash^1
-function file.join(...) -- rather dirty
+local deslasher = lpeg.replacer(S("\\/")^1,"/")
+
+-- If we have a network or prefix then there is a change that we end up with two
+-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2)
+-- split and rebuild as url. Of course we could assume no network paths (which
+-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but
+-- then we still have to deal with urls ... anyhow, multiple // are never a real
+-- problem but just ugly.
+
+function file.join(...)
local lst = { ... }
local one = lst[1]
if lpegmatch(isnetwork,one) then
- local two = lpegmatch(reslasher,concat(lst,"/",2))
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
return one .. "/" .. two
elseif lpegmatch(isroot,one) then
- local two = lpegmatch(reslasher,concat(lst,"/",2))
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
if lpegmatch(hasroot,two) then
return two
else
@@ -3349,7 +3622,7 @@ function file.join(...) -- rather dirty
elseif one == "" then
return lpegmatch(stripper,concat(lst,"/",2))
else
- return lpegmatch(reslasher,concat(lst,"/"))
+ return lpegmatch(deslasher,concat(lst,"/"))
end
end
@@ -3378,6 +3651,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C
local absolute = fwslash
function file.collapsepath(str,anchor)
+ if not str then
+ return
+ end
if anchor and not lpegmatch(anchors,str) then
str = getcurrentdir() .. "/" .. str
end
@@ -3387,7 +3663,6 @@ function file.collapsepath(str,anchor)
return lpegmatch(reslasher,str)
end
local starter, oldelements = lpegmatch(splitstarter,str)
--- inspect(oldelements)
local newelements = { }
local i = #oldelements
while i > 0 do
@@ -3441,11 +3716,13 @@ local whatever = P("-")^0 / ""
local pattern_b = Cs(whatever * (1 - whatever * -1)^1)
function file.robustname(str,strict)
- str = lpegmatch(pattern_a,str) or str
- if strict then
- return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
- else
- return str
+ if str then
+ str = lpegmatch(pattern_a,str) or str
+ if strict then
+ return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
+ else
+ return str
+ end
end
end
@@ -3453,7 +3730,9 @@ file.readdata = io.loaddata
file.savedata = io.savedata
function file.copy(oldname,newname)
- file.savedata(newname,io.loaddata(oldname))
+ if oldname and newname then
+ file.savedata(newname,io.loaddata(oldname))
+ end
end
-- also rewrite previous
@@ -3474,11 +3753,11 @@ lpeg.patterns.rootbased = rootbased
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return lpegmatch(qualified,filename) ~= nil
+ return filename and lpegmatch(qualified,filename) ~= nil
end
function file.is_rootbased_path(filename)
- return lpegmatch(rootbased,filename) ~= nil
+ return filename and lpegmatch(rootbased,filename) ~= nil
end
-- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end
@@ -3500,8 +3779,10 @@ end
-- for myself:
function file.strip(name,dir)
- local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
- return a ~= "" and a or name
+ if name then
+ local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
+ return a ~= "" and a or name
+ end
end
-- local debuglist = {
@@ -3943,7 +4224,7 @@ if not modules then modules = { } end modules ['l-dir'] = {
-- dir.expandname will be merged with cleanpath and collapsepath
-local type = type
+local type, select = type, select
local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub
local concat, insert, remove = table.concat, table.insert, table.remove
local lpegmatch = lpeg.match
@@ -4165,15 +4446,15 @@ local onwindows = os.type == "windows" or find(os.getenv("PATH"),";")
if onwindows then
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
- if s ~= "" then
- if str ~= "" then
- str = str .. "/" .. s
- else
- str = s
- end
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
+ if s == "" then
+ -- skip
+ elseif str == "" then
+ str = s
+ else
+ str = str .. "/" .. s
end
end
local first, middle, last
@@ -4222,9 +4503,9 @@ if onwindows then
else
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
if s and s ~= "" then -- we catch nil and false
if str ~= "" then
str = str .. "/" .. s
@@ -4424,29 +4705,45 @@ if not modules then modules = { } end modules ['l-unicode'] = {
-- todo: utf.sub replacement (used in syst-aux)
-local concat = table.concat
+-- we put these in the utf namespace:
+
+utf = utf or (unicode and unicode.utf8) or { }
+
+utf.characters = utf.characters or string.utfcharacters
+utf.values = utf.values or string.utfvalues
+
+-- string.utfvalues
+-- string.utfcharacters
+-- string.characters
+-- string.characterpairs
+-- string.bytes
+-- string.bytepairs
+
local type = type
-local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
+local char, byte, format, sub = string.char, string.byte, string.format, string.sub
+local concat = table.concat
+local P, C, R, Cs, Ct, Cmt, Cc, Carg = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg
local lpegmatch, patterns = lpeg.match, lpeg.patterns
-local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
-local utfsplitlines = string.utfsplitlines
-if not unicode then
+local bytepairs = string.bytepairs
- unicode = { }
+local finder = lpeg.finder
+local replacer = lpeg.replacer
-end
+local utfvalues = utf.values
+local utfgmatch = utf.gmatch -- not always present
-local unicode = unicode
+local p_utftype = patterns.utftype
+local p_utfoffset = patterns.utfoffset
+local p_utf8char = patterns.utf8char
+local p_utf8byte = patterns.utf8byte
+local p_utfbom = patterns.utfbom
+local p_newline = patterns.newline
+local p_whitespace = patterns.whitespace
-utf = utf or unicode.utf8
-
-if not utf then
+if not unicode then
- utf8 = { }
- unicode.utf8 = utf8
- utf = utf8
+ unicode = { utf = utf } -- for a while
end
@@ -4503,64 +4800,13 @@ if not utf.byte then
end
-if not utf.sub then
-
- local utf8char = patterns.utf8char
-
- -- inefficient as lpeg just copies ^n
-
- -- local function sub(str,start,stop)
- -- local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
- -- inspect(pattern)
- -- return lpegmatch(pattern,str) or ""
- -- end
-
- local b, e, n, first, last = 0, 0, 0, 0, 0
-
- local function slide(s,p)
- n = n + 1
- if n == first then
- b = p
- if not last then
- return nil
- end
- end
- if n == last then
- e = p
- return nil
- else
- return p
- end
- end
-
- local pattern = Cmt(utf8char,slide)^0
-
- function utf.sub(str,start,stop) -- todo: from the end
- if not start then
- return str
- end
- b, e, n, first, last = 0, 0, 0, start, stop
- lpegmatch(pattern,str)
- if not stop then
- return sub(str,b)
- else
- return sub(str,b,e)
- end
- end
-
- -- print(utf.sub("Hans Hagen is my name"))
- -- print(utf.sub("Hans Hagen is my name",5))
- -- print(utf.sub("Hans Hagen is my name",5,10))
-
-end
-
local utfchar, utfbyte = utf.char, utf.byte
-- As we want to get rid of the (unmaintained) utf library we implement our own
-- variants (in due time an independent module):
-function unicode.filetype(data)
- return data and lpegmatch(utftype,data) or "unknown"
+function utf.filetype(data)
+ return data and lpegmatch(p_utftype,data) or "unknown"
end
local toentities = Cs (
@@ -4647,7 +4893,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
-function string.toutf(s)
+function string.toutf(s) -- in string namespace
return lpegmatch(pattern,s) or s -- todo: utf32
end
@@ -4663,26 +4909,269 @@ local validatedutf = Cs (
patterns.validatedutf = validatedutf
-function string.validutf(str)
- return lpegmatch(validatedutf,str)
+function utf.is_valid(str)
+ return type(str) == "string" and lpegmatch(validatedutf,str) or false
end
+if not utf.len then
-utf.length = string.utflength
-utf.split = string.utfsplit
-utf.splitines = string.utfsplitlines
-utf.valid = string.validutf
+ -- -- alternative 1: 0.77
+ --
+ -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0)
+ --
+ -- function utf.len(str)
+ -- return #lpegmatch(utfcharcounter,str or "")
+ -- end
+ --
+ -- -- alternative 2: 1.70
+ --
+ -- local n = 0
+ --
+ -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow
+ --
+ -- function utf.length(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+ --
+ -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
+
+ -- local n = 0
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * (
+ -- -- patterns.utf8one ^1 * Cc(1)
+ -- -- + patterns.utf8two ^1 * Cc(2)
+ -- -- + patterns.utf8three^1 * Cc(3)
+ -- -- + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end
+ -- -- )^0 ) -- just as many captures as below
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( (
+ -- -- (Cmt(patterns.utf8one ^1,function(_,_,s) n = n + #s return true end))
+ -- -- + (Cmt(patterns.utf8two ^1,function(_,_,s) n = n + #s/2 return true end))
+ -- -- + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end))
+ -- -- + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end))
+ -- -- )^0 ) -- not interesting as it creates strings but sometimes faster
+ --
+ -- -- The best so far:
+ --
+ -- local utfcharcounter = utfbom^-1 * P ( (
+ -- Cp() * (patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
+ -- + Cp() * (patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
+ -- + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
+ -- + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
+ -- )^0 )
+
+ -- function utf.len(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+
+ local n, f = 0, 1
+
+ local utfcharcounter = patterns.utfbom^-1 * Cmt (
+ Cc(1) * patterns.utf8one ^1
+ + Cc(2) * patterns.utf8two ^1
+ + Cc(3) * patterns.utf8three^1
+ + Cc(4) * patterns.utf8four ^1,
+ function(_,t,d) -- due to Cc no string captures, so faster
+ n = n + (t - f)/d
+ f = t
+ return true
+ end
+ )^0
+
+ function utf.len(str)
+ n, f = 0, 1
+ lpegmatch(utfcharcounter,str or "")
+ return n
+ end
-if not utf.len then
- utf.len = utf.length
end
--- a replacement for simple gsubs:
+utf.length = utf.len
+
+if not utf.sub then
+
+ -- inefficient as lpeg just copies ^n
+
+ -- local function sub(str,start,stop)
+ -- local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1))
+ -- inspect(pattern)
+ -- return lpegmatch(pattern,str) or ""
+ -- end
+
+ -- local b, e, n, first, last = 0, 0, 0, 0, 0
+ --
+ -- local function slide(s,p)
+ -- n = n + 1
+ -- if n == first then
+ -- b = p
+ -- if not last then
+ -- return nil
+ -- end
+ -- end
+ -- if n == last then
+ -- e = p
+ -- return nil
+ -- else
+ -- return p
+ -- end
+ -- end
+ --
+ -- local pattern = Cmt(p_utf8char,slide)^0
+ --
+ -- function utf.sub(str,start,stop) -- todo: from the end
+ -- if not start then
+ -- return str
+ -- end
+ -- b, e, n, first, last = 0, 0, 0, start, stop
+ -- lpegmatch(pattern,str)
+ -- if not stop then
+ -- return sub(str,b)
+ -- else
+ -- return sub(str,b,e-1)
+ -- end
+ -- end
+
+ -- print(utf.sub("Hans Hagen is my name"))
+ -- print(utf.sub("Hans Hagen is my name",5))
+ -- print(utf.sub("Hans Hagen is my name",5,10))
+
+ local utflength = utf.length
+
+ -- also negative indices, upto 10 times slower than a c variant
+
+ local b, e, n, first, last = 0, 0, 0, 0, 0
+
+ local function slide_zero(s,p)
+ n = n + 1
+ if n >= last then
+ e = p - 1
+ else
+ return p
+ end
+ end
+
+ local function slide_one(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ end
+ if n >= last then
+ e = p - 1
+ else
+ return p
+ end
+ end
+
+ local function slide_two(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ else
+ return true
+ end
+ end
+
+ local pattern_zero = Cmt(p_utf8char,slide_zero)^0
+ local pattern_one = Cmt(p_utf8char,slide_one )^0
+ local pattern_two = Cmt(p_utf8char,slide_two )^0
+
+ function utf.sub(str,start,stop)
+ if not start then
+ return str
+ end
+ if start == 0 then
+ start = 1
+ end
+ if not stop then
+ if start < 0 then
+ local l = utflength(str) -- we can inline this function if needed
+ start = l + start
+ else
+ start = start - 1
+ end
+ b, n, first = 0, 0, start
+ lpegmatch(pattern_two,str)
+ if n >= first then
+ return sub(str,b)
+ else
+ return ""
+ end
+ end
+ if start < 0 or stop < 0 then
+ local l = utf.length(str)
+ if start < 0 then
+ start = l + start
+ if start <= 0 then
+ start = 1
+ else
+ start = start + 1
+ end
+ end
+ if stop < 0 then
+ stop = l + stop
+ if stop == 0 then
+ stop = 1
+ else
+ stop = stop + 1
+ end
+ end
+ end
+ if start > stop then
+ return ""
+ elseif start > 1 then
+ b, e, n, first, last = 0, 0, 0, start - 1, stop
+ lpegmatch(pattern_one,str)
+ if n >= first and e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ else
+ b, e, n, last = 1, 0, 0, stop
+ lpegmatch(pattern_zero,str)
+ if e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ end
+ end
-local utf8char = patterns.utf8char
+ -- local n = 100000
+ -- local str = string.rep("123456àáâãäå",100)
+ --
+ -- for i=-15,15,1 do
+ -- for j=-15,15,1 do
+ -- if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then
+ -- print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j))
+ -- end
+ -- end
+ -- if utf.xsub(str,i) ~= utf.sub(str,i) then
+ -- print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i))
+ -- end
+ -- end
+
+ -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7))
+ -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7))
+ -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9))
+ -- print(" 4 ",utf.xsub(str, 4 ),utf.sub(str, 4 ))
+ -- print(" 0 ",utf.xsub(str, 0 ),utf.sub(str, 0 ))
+ -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0))
+ -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4))
+ -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0))
+ -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0))
+ -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3))
+ -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3))
+ -- print("-3 ",utf.xsub(str,-3 ),utf.sub(str,-3 ))
+
+end
+
+-- a replacement for simple gsubs:
function utf.remapper(mapping)
- local pattern = Cs((utf8char/mapping)^0)
+ local pattern = Cs((p_utf8char/mapping)^0)
return function(str)
if not str or str == "" then
return ""
@@ -4695,55 +5184,113 @@ end
-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" }
-- print(remap("abcd 1234 abcd"))
+--
+
+function utf.replacer(t) -- no precheck, always string builder
+ local r = replacer(t,false,false,true)
+ return function(str)
+ return lpegmatch(r,str)
+ end
+end
+
+function utf.subtituter(t) -- with precheck and no building if no match
+ local f = finder (t)
+ local r = replacer(t,false,false,true)
+ return function(str)
+ local i = lpegmatch(f,str)
+ if not i then
+ return str
+ elseif i > #str then
+ return str
+ else
+ -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower
+ return lpegmatch(r,str)
+ end
+ end
+end
+
+-- inspect(utf.split("a b c d"))
+-- inspect(utf.split("a b c d",true))
+
+local utflinesplitter = p_utfbom^-1 * lpeg.tsplitat(p_newline)
+local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0)
+local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0)
+local utfcharsplitter_raw = Ct(C(p_utf8char)^0)
+
+patterns.utflinesplitter = utflinesplitter
+
+function utf.splitlines(str)
+ return lpegmatch(utflinesplitter,str or "")
+end
+
+function utf.split(str,ignorewhitespace) -- new
+ if ignorewhitespace then
+ return lpegmatch(utfcharsplitter_iws,str or "")
+ else
+ return lpegmatch(utfcharsplitter_ows,str or "")
+ end
+end
+
+function utf.totable(str) -- keeps bom
+ return lpegmatch(utfcharsplitter_raw,str)
+end
+
-- 0 EF BB BF UTF-8
-- 1 FF FE UTF-16-little-endian
-- 2 FE FF UTF-16-big-endian
-- 3 FF FE 00 00 UTF-32-little-endian
-- 4 00 00 FE FF UTF-32-big-endian
-
-unicode.utfname = {
- [0] = 'utf-8',
- [1] = 'utf-16-le',
- [2] = 'utf-16-be',
- [3] = 'utf-32-le',
- [4] = 'utf-32-be'
-}
-
+--
-- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated
-function unicode.utftype(f)
- local str = f:read(4)
- if not str then
- f:seek('set')
- return 0
- -- elseif find(str,"^%z%z\254\255") then -- depricated
- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
- return 4
- -- elseif find(str,"^\255\254%z%z") then -- depricated
- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
- return 3
- elseif find(str,"^\254\255") then
- f:seek('set',2)
- return 2
- elseif find(str,"^\255\254") then
- f:seek('set',2)
- return 1
- elseif find(str,"^\239\187\191") then
- f:seek('set',3)
- return 0
- else
- f:seek('set')
- return 0
+-- utf.name = {
+-- [0] = 'utf-8',
+-- [1] = 'utf-16-le',
+-- [2] = 'utf-16-be',
+-- [3] = 'utf-32-le',
+-- [4] = 'utf-32-be'
+-- }
+--
+-- function utf.magic(f)
+-- local str = f:read(4)
+-- if not str then
+-- f:seek('set')
+-- return 0
+-- -- elseif find(str,"^%z%z\254\255") then -- depricated
+-- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
+-- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
+-- return 4
+-- -- elseif find(str,"^\255\254%z%z") then -- depricated
+-- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
+-- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
+-- return 3
+-- elseif find(str,"^\254\255") then
+-- f:seek('set',2)
+-- return 2
+-- elseif find(str,"^\255\254") then
+-- f:seek('set',2)
+-- return 1
+-- elseif find(str,"^\239\187\191") then
+-- f:seek('set',3)
+-- return 0
+-- else
+-- f:seek('set')
+-- return 0
+-- end
+-- end
+
+function utf.magic(f) -- not used
+ local str = f:read(4) or ""
+ local off = lpegmatch(p_utfoffset,str)
+ if off < 4 then
+ f:seek('set',off)
end
+ return lpegmatch(p_utftype,str)
end
-
-
local function utf16_to_utf8_be(t)
if type(t) == "string" then
- t = utfsplitlines(str)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4771,7 +5318,7 @@ end
local function utf16_to_utf8_le(t)
if type(t) == "string" then
- t = utfsplitlines(str)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4799,7 +5346,7 @@ end
local function utf32_to_utf8_be(t)
if type(t) == "string" then
- t = utfsplitlines(t)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4824,7 +5371,7 @@ end
local function utf32_to_utf8_le(t)
if type(t) == "string" then
- t = utfsplitlines(t)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4847,20 +5394,20 @@ local function utf32_to_utf8_le(t)
return t
end
-unicode.utf32_to_utf8_be = utf32_to_utf8_be
-unicode.utf32_to_utf8_le = utf32_to_utf8_le
-unicode.utf16_to_utf8_be = utf16_to_utf8_be
-unicode.utf16_to_utf8_le = utf16_to_utf8_le
+utf.utf32_to_utf8_be = utf32_to_utf8_be
+utf.utf32_to_utf8_le = utf32_to_utf8_le
+utf.utf16_to_utf8_be = utf16_to_utf8_be
+utf.utf16_to_utf8_le = utf16_to_utf8_le
-function unicode.utf8_to_utf8(t)
- return type(t) == "string" and utfsplitlines(t) or t
+function utf.utf8_to_utf8(t)
+ return type(t) == "string" and lpegmatch(utflinesplitter,t) or t
end
-function unicode.utf16_to_utf8(t,endian)
+function utf.utf16_to_utf8(t,endian)
return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t
end
-function unicode.utf32_to_utf8(t,endian)
+function utf.utf32_to_utf8(t,endian)
return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t
end
@@ -4886,7 +5433,7 @@ local function big(c)
end
end
--- function unicode.utf8_to_utf16(str,littleendian)
+-- function utf.utf8_to_utf16(str,littleendian)
-- if littleendian then
-- return char(255,254) .. utfgsub(str,".",little)
-- else
@@ -4897,7 +5444,7 @@ end
local _, l_remap = utf.remapper(little)
local _, b_remap = utf.remapper(big)
-function unicode.utf8_to_utf16(str,littleendian)
+function utf.utf8_to_utf16(str,littleendian)
if littleendian then
return char(255,254) .. lpegmatch(l_remap,str)
else
@@ -4905,31 +5452,71 @@ function unicode.utf8_to_utf16(str,littleendian)
end
end
-function unicode.utfcodes(str)
- local t, n = { }, 0
- for u in utfvalues(str) do
- n = n + 1
- t[n] = format("0x%04X",u)
- end
- return concat(t,separator or " ")
+-- function utf.tocodes(str,separator) -- can be sped up with an lpeg
+-- local t, n = { }, 0
+-- for u in utfvalues(str) do
+-- n = n + 1
+-- t[n] = format("0x%04X",u)
+-- end
+-- return concat(t,separator or " ")
+-- end
+
+local pattern = Cs (
+ (p_utf8byte / function(unicode ) return format( "0x%04X", unicode) end) *
+ (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0
+)
+
+function utf.tocodes(str,separator)
+ return lpegmatch(pattern,str,1,separator or " ")
end
-function unicode.ustring(s)
+function utf.ustring(s)
return format("U+%05X",type(s) == "number" and s or utfbyte(s))
end
-function unicode.xstring(s)
+function utf.xstring(s)
return format("0x%05X",type(s) == "number" and s or utfbyte(s))
end
--
-local pattern = Ct(C(patterns.utf8char)^0)
+local p_nany = p_utf8char / ""
+
+if utfgmatch then
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local n = 0
+ for _ in utfgmatch(str,what) do
+ n = n + 1
+ end
+ return n
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
+
+else
+
+ local cache = { }
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local p = cache[what]
+ if not p then
+ p = Cs((P(what)/" " + p_nany)^0)
+ cache[p] = p
+ end
+ return #lpegmatch(p,str)
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
-function utf.totable(str)
- return lpegmatch(pattern,str)
end
+-- maybe also register as string.utf*
+
end -- of closure
@@ -4990,24 +5577,10 @@ local tables = utilities.tables
local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub
local concat, insert, remove = table.concat, table.insert, table.remove
local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring
-local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring
+local type, next, rawset, tonumber, load, select = type, next, rawset, tonumber, load, select
local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs
local serialize = table.serialize
--- function tables.definetable(target) -- defines undefined tables
--- local composed, t, n = nil, { }, 0
--- for name in gmatch(target,"([^%.]+)") do
--- n = n + 1
--- if composed then
--- composed = composed .. "." .. name
--- else
--- composed = name
--- end
--- t[n] = format("%s = %s or { }",composed,composed)
--- end
--- return concat(t,"\n")
--- end
-
local splitter = lpeg.tsplitat(".")
function tables.definetable(target,nofirst,nolast) -- defines undefined tables
@@ -5036,13 +5609,13 @@ end
-- local t = tables.definedtable("a","b","c","d")
function tables.definedtable(...)
- local l = { ... }
local t = _G
- for i=1,#l do
- local tl = t[l[i]]
+ for i=1,select("#",...) do
+ local li = select(i,...)
+ local tl = t[li]
if not tl then
tl = { }
- t[l[i]] = tl
+ t[li] = tl
end
t = tl
end
@@ -5235,7 +5808,7 @@ function table.deserialize(str)
if not str or str == "" then
return
end
- local code = loadstring(str)
+ local code = load(str)
if not code then
return
end
@@ -5252,7 +5825,7 @@ function table.load(filename)
if filename then
local t = io.loaddata(filename)
if t and t ~= "" then
- t = loadstring(t)
+ t = load(t)
if type(t) == "function" then
t = t()
if type(t) == "table" then
@@ -5331,9 +5904,11 @@ utilities = utilities or { }
utilities.storage = utilities.storage or { }
local storage = utilities.storage
+local report = texio and texio.write_nl or print
+
function storage.mark(t)
if not t then
- texio.write_nl("fatal error: storage cannot be marked")
+ report("fatal error: storage cannot be marked")
return -- os.exit()
end
local m = getmetatable(t)
@@ -5363,12 +5938,36 @@ end
function storage.checked(t)
if not t then
- texio.write_nl("fatal error: storage has not been allocated")
+ report("fatal error: storage has not been allocated")
return -- os.exit()
end
return t
end
+-- function utilities.storage.delay(parent,name,filename)
+-- local m = getmetatable(parent)
+-- m.__list[name] = filename
+-- end
+--
+-- function utilities.storage.predefine(parent)
+-- local list = { }
+-- local m = getmetatable(parent) or {
+-- __list = list,
+-- __index = function(t,k)
+-- local l = require(list[k])
+-- t[k] = l
+-- return l
+-- end
+-- }
+-- setmetatable(parent,m)
+-- end
+--
+-- bla = { }
+-- utilities.storage.predefine(bla)
+-- utilities.storage.delay(bla,"test","oepsoeps")
+-- local t = bla.test
+-- table.print(t)
+-- print(t.a)
function storage.setinitializer(data,initialize)
local m = getmetatable(data) or { }
@@ -5393,12 +5992,14 @@ end
-- table namespace ?
-local function f_empty () return "" end -- t,k
-local function f_self (t,k) t[k] = k return k end
-local function f_ignore() end -- t,k,v
+local function f_empty () return "" end -- t,k
+local function f_self (t,k) t[k] = k return k end
+local function f_table (t,k) local v = { } t[k] = v return v end
+local function f_ignore() end -- t,k,v
local t_empty = { __index = f_empty }
local t_self = { __index = f_self }
+local t_table = { __index = f_table }
local t_ignore = { __newindex = f_ignore }
function table.setmetatableindex(t,f)
@@ -5408,6 +6009,8 @@ function table.setmetatableindex(t,f)
m.__index = f_empty
elseif f == "key" then
m.__index = f_self
+ elseif f == "table" then
+ m.__index = f_table
else
m.__index = f
end
@@ -5416,6 +6019,8 @@ function table.setmetatableindex(t,f)
setmetatable(t, t_empty)
elseif f == "key" then
setmetatable(t, t_self)
+ elseif f == "table" then
+ setmetatable(t, t_table)
else
setmetatable(t,{ __index = f })
end
@@ -5626,7 +6231,7 @@ if not modules then modules = { } end modules ['util-lua'] = {
}
local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format
-local loadstring, loadfile, type = loadstring, loadfile, type
+local load, loadfile, type = load, loadfile, type
utilities = utilities or {}
utilities.lua = utilities.lua or { }
@@ -5643,11 +6248,23 @@ luautilities.nofstrippedbytes = 0
local strippedchunks = { } -- allocate()
luautilities.strippedchunks = strippedchunks
+luautilities.suffixes = {
+ tma = "tma",
+ tmc = jit and "tmb" or "tmc",
+ lua = "lua",
+ luc = jit and "lub" or "luc",
+ lui = "lui",
+ luv = "luv",
+ luj = "luj",
+ tua = "tua",
+ tuc = "tuc",
+}
+
local function fatalerror(name)
utilities.report(format("fatal error in %q",name or "unknown"))
end
-if jit then
+if jit or status.luatex_version >= 74 then
local function register(name)
if tracestripping then
@@ -5660,7 +6277,7 @@ if jit then
local function stupidcompile(luafile,lucfile,strip)
local code = io.loaddata(luafile)
if code and code ~= "" then
- code = loadstring(code)
+ code = load(code)
if code then
code = dump(code,strip and luautilities.stripcode or luautilities.alwaysstripcode)
if code and code ~= "" then
@@ -5692,13 +6309,13 @@ if jit then
end
if forcestrip or luautilities.alwaysstripcode then
register(name)
- return loadstring(dump(code,true)), 0
+ return load(dump(code,true)), 0
else
return code, 0
end
elseif luautilities.alwaysstripcode then
register(name)
- return loadstring(dump(code,true)), 0
+ return load(dump(code,true)), 0
else
return code, 0
end
@@ -5706,14 +6323,14 @@ if jit then
function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
if forcestrip and luautilities.stripcode or luautilities.alwaysstripcode then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror(name)
end
register(name)
code = dump(code,true)
end
- return loadstring(code), 0
+ return load(code), 0
end
function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true
@@ -5754,67 +6371,79 @@ else
return delta
end
- local function strip_code_pc(dump,name)
- local before = #dump
- local version, format, endian, int, size, ins, num = byte(dump,5,11)
- local subint
- if endian == 1 then
- subint = function(dump, i, l)
- local val = 0
- for n = l, 1, -1 do
- val = val * 256 + byte(dump,i + n - 1)
+ local strip_code_pc
+
+ if _MAJORVERSION == 5 and _MINORVERSION == 1 then
+
+ strip_code_pc = function(dump,name)
+ local before = #dump
+ local version, format, endian, int, size, ins, num = byte(dump,5,11)
+ local subint
+ if endian == 1 then
+ subint = function(dump, i, l)
+ local val = 0
+ for n = l, 1, -1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
+ end
+ else
+ subint = function(dump, i, l)
+ local val = 0
+ for n = 1, l, 1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
end
- return val, i + l
end
- else
- subint = function(dump, i, l)
- local val = 0
- for n = 1, l, 1 do
- val = val * 256 + byte(dump,i + n - 1)
+ local strip_function
+ strip_function = function(dump)
+ local count, offset = subint(dump, 1, size)
+ local stripped, dirty = rep("\0", size), offset + count
+ offset = offset + count + int * 2 + 4
+ offset = offset + int + subint(dump, offset, int) * ins
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ local t
+ t, offset = subint(dump, offset, 1)
+ if t == 1 then
+ offset = offset + 1
+ elseif t == 4 then
+ offset = offset + size + subint(dump, offset, size)
+ elseif t == 3 then
+ offset = offset + num
+ end
end
- return val, i + l
- end
- end
- local strip_function
- strip_function = function(dump)
- local count, offset = subint(dump, 1, size)
- local stripped, dirty = rep("\0", size), offset + count
- offset = offset + count + int * 2 + 4
- offset = offset + int + subint(dump, offset, int) * ins
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- local t
- t, offset = subint(dump, offset, 1)
- if t == 1 then
- offset = offset + 1
- elseif t == 4 then
- offset = offset + size + subint(dump, offset, size)
- elseif t == 3 then
- offset = offset + num
+ count, offset = subint(dump, offset, int)
+ stripped = stripped .. sub(dump,dirty, offset - 1)
+ for n = 1, count do
+ local proto, off = strip_function(sub(dump,offset, -1))
+ stripped, offset = stripped .. proto, offset + off - 1
end
+ offset = offset + subint(dump, offset, int) * int + int
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size + int * 2
+ end
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size
+ end
+ stripped = stripped .. rep("\0", int * 3)
+ return stripped, offset
end
- count, offset = subint(dump, offset, int)
- stripped = stripped .. sub(dump,dirty, offset - 1)
- for n = 1, count do
- local proto, off = strip_function(sub(dump,offset, -1))
- stripped, offset = stripped .. proto, offset + off - 1
- end
- offset = offset + subint(dump, offset, int) * int + int
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- offset = offset + subint(dump, offset, size) + size + int * 2
- end
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- offset = offset + subint(dump, offset, size) + size
- end
- stripped = stripped .. rep("\0", int * 3)
- return stripped, offset
+ dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1))
+ local after = #dump
+ local delta = register(name,before,after)
+ return dump, delta
+ end
+
+ else
+
+ strip_code_pc = function(dump,name)
+ return dump, 0
end
- dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1))
- local after = #dump
- local delta = register(name,before,after)
- return dump, delta
+
end
-- ... end of borrowed code.
@@ -5834,14 +6463,14 @@ else
end
if forcestrip then
local code, n = strip_code_pc(dump(code),name)
- return loadstring(code), n
+ return load(code), n
elseif luautilities.alwaysstripcode then
- return loadstring(strip_code_pc(dump(code),name))
+ return load(strip_code_pc(dump(code),name))
else
return code, 0
end
elseif luautilities.alwaysstripcode then
- return loadstring(strip_code_pc(dump(code),name))
+ return load(strip_code_pc(dump(code),name))
else
return code, 0
end
@@ -5850,20 +6479,20 @@ else
function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
local n = 0
if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror(name)
end
code, n = strip_code_pc(dump(code),name)
end
- return loadstring(code), n
+ return load(code), n
end
local function stupidcompile(luafile,lucfile,strip)
local code = io.loaddata(luafile)
local n = 0
if code and code ~= "" then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror()
end
@@ -5903,6 +6532,7 @@ else
utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile)
end
cleanup = false -- better see how bad it is
+ done = true -- hm
end
if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then
utilities.report("lua: removing %s",luafile)
@@ -5949,7 +6579,6 @@ if not modules then modules = { } end modules ['util-prs'] = {
}
local lpeg, table, string = lpeg, table, string
-
local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
local lpegmatch, patterns = lpeg.match, lpeg.patterns
local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find
@@ -6367,6 +6996,45 @@ function parsers.rfc4180splitter(specification)
end
end
+-- utilities.parsers.stepper("1,7-",9,function(i) print(">>>",i) end)
+-- utilities.parsers.stepper("1-3,7,8,9")
+-- utilities.parsers.stepper("1-3,6,7",function(i) print(">>>",i) end)
+-- utilities.parsers.stepper(" 1 : 3, ,7 ")
+-- utilities.parsers.stepper("1:4,9:13,24:*",30)
+
+local function ranger(first,last,n,action)
+ if not first then
+ -- forget about it
+ elseif last == true then
+ for i=first,n or first do
+ action(i)
+ end
+ elseif last then
+ for i=first,last do
+ action(i)
+ end
+ else
+ action(first)
+ end
+end
+
+local cardinal = patterns.cardinal / tonumber
+local spacers = patterns.spacer^0
+local endofstring = patterns.endofstring
+
+local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + Cc(true) ) + Cc(false) )
+ * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
+
+local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
+ * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring -- we're sort of strict (could do without endofstring)
+
+function utilities.parsers.stepper(str,n,action)
+ if type(n) == "function" then
+ lpegmatch(stepper,str,1,false,n or print)
+ else
+ lpegmatch(stepper,str,1,n,action or print)
+ end
+end
end -- of closure
@@ -6817,7 +7485,7 @@ if not modules then modules = { } end modules ['trac-set'] = { -- might become u
local type, next, tostring = type, next, tostring
local concat = table.concat
-local format, find, lower, gsub, escapedpattern = string.format, string.find, string.lower, string.gsub, string.escapedpattern
+local format, find, lower, gsub, topattern = string.format, string.find, string.lower, string.gsub, string.topattern
local is_boolean = string.is_boolean
local settings_to_hash = utilities.parsers.settings_to_hash
local allocate = utilities.storage.allocate
@@ -6905,7 +7573,7 @@ local function set(t,what,newvalue)
else
value = is_boolean(value,value)
end
- w = "^" .. escapedpattern(w,true) .. "$" -- new: anchored
+ w = topattern(w,true,true)
for name, functions in next, data do
if done[name] then
-- prevent recursion due to wildcards
@@ -6959,7 +7627,8 @@ function setters.register(t,what,...)
end
end
local default = functions.default -- can be set from cnf file
- for _, fnc in next, { ... } do
+ for i=1,select("#",...) do
+ local fnc = select(i,...)
local typ = type(fnc)
if typ == "string" then
if trace_initialize then
@@ -7028,7 +7697,7 @@ function setters.show(t)
local value, default, modules = functions.value, functions.default, #functions
value = value == nil and "unset" or tostring(value)
default = default == nil and "unset" or tostring(default)
- t.report("%-50s modules: %2i default: %6s value: %6s",name,modules,default,value)
+ t.report("%-50s modules: %2i default: %-12s value: %-12s",name,modules,default,value)
end
end
t.report()
@@ -7052,17 +7721,29 @@ local function report(setter,...)
end
end
-function setters.new(name)
+local function default(setter,name)
+ local d = setter.data[name]
+ return d and d.default
+end
+
+local function value(setter,name)
+ local d = setter.data[name]
+ return d and (d.value or d.default)
+end
+
+function setters.new(name) -- we could use foo:bar syntax (but not used that often)
local setter -- we need to access it in setter itself
setter = {
data = allocate(), -- indexed, but also default and value fields
name = name,
- report = function(...) report (setter,...) end,
- enable = function(...) enable (setter,...) end,
- disable = function(...) disable (setter,...) end,
- register = function(...) register(setter,...) end,
- list = function(...) list (setter,...) end,
- show = function(...) show (setter,...) end,
+ report = function(...) report (setter,...) end,
+ enable = function(...) enable (setter,...) end,
+ disable = function(...) disable (setter,...) end,
+ register = function(...) register(setter,...) end,
+ list = function(...) list (setter,...) end,
+ show = function(...) show (setter,...) end,
+ default = function(...) return default (setter,...) end,
+ value = function(...) return value (setter,...) end,
}
data[name] = setter
return setter
@@ -7189,9 +7870,9 @@ if not modules then modules = { } end modules ['trac-log'] = {
local write_nl, write = texio and texio.write_nl or print, texio and texio.write or io.write
local format, gmatch, find = string.format, string.gmatch, string.find
local concat, insert, remove = table.concat, table.insert, table.remove
-local escapedpattern = string.escapedpattern
+local topattern = string.topattern
local texcount = tex and tex.count
-local next, type = next, type
+local next, type, select = next, type, select
local setmetatableindex = table.setmetatableindex
@@ -7502,7 +8183,7 @@ local function setblocked(category,value)
if data[c] then
v.state = value
else
- c = escapedpattern(c,true)
+ c = topattern(c,true,true)
for k, v in next, data do
if find(k,c) then
v.state = value
@@ -7720,10 +8401,10 @@ local function reporthelp(t,...)
if type(helpinfo) == "string" then
reportlines(t,helpinfo)
elseif type(helpinfo) == "table" then
- local tags = { ... }
- for i=1,#tags do
- reportlines(t,t.helpinfo[tags[i]])
- if i < #tags then
+ local n = select("#",...)
+ for i=1,n do
+ reportlines(t,t.helpinfo[select(i,...)])
+ if i < n then
t.report()
end
end
@@ -8013,6 +8694,10 @@ local format, sub, match, gsub, find = string.format, string.sub, string.match,
local unquoted, quoted = string.unquoted, string.quoted
local concat, insert, remove = table.concat, table.insert, table.remove
local loadedluacode = utilities.lua.loadedluacode
+local luasuffixes = utilities.lua.suffixes
+
+environment = environment or { }
+local environment = environment
-- precautions
@@ -8022,9 +8707,29 @@ function os.setlocale()
-- no way you can mess with it
end
--- dirty tricks
+-- dirty tricks (we will replace the texlua call by luatex --luaonly)
-if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaonly" then
+local validengines = allocate {
+ ["luatex"] = true,
+ ["luajittex"] = true,
+ -- ["luatex.exe"] = true,
+ -- ["luajittex.exe"] = true,
+}
+
+local basicengines = allocate {
+ ["luatex"] = "luatex",
+ ["texlua"] = "luatex",
+ ["texluac"] = "luatex",
+ ["luajittex"] = "luajittex",
+ ["texluajit"] = "luajittex",
+ -- ["texlua.exe"] = "luatex",
+ -- ["texluajit.exe"] = "luajittex",
+}
+
+environment.validengines = validengines
+environment.basicengines = basicengines
+
+if arg and validengines[file.removesuffix(arg[0])] and arg[1] == "--luaonly" then
arg[-1] = arg[0]
arg[ 0] = arg[2]
for k=3,#arg do
@@ -8056,9 +8761,6 @@ end
-- environment
-environment = environment or { }
-local environment = environment
-
environment.arguments = allocate()
environment.files = allocate()
environment.sortedflags = nil
@@ -8114,7 +8816,7 @@ function environment.initializearguments(arg)
end
end
end
- environment.ownname = environment.ownname or arg[0] or 'unknown.lua'
+ environment.ownname = file.reslash(environment.ownname or arg[0] or 'unknown.lua')
end
function environment.setargument(name,value)
@@ -8195,6 +8897,22 @@ function environment.reconstructcommandline(arg,noquote)
end
end
+-- -- to be tested:
+--
+-- function environment.reconstructcommandline(arg,noquote)
+-- arg = arg or environment.originalarguments
+-- if noquote and #arg == 1 then
+-- return unquoted(resolvers.resolve(arg[1]))
+-- elseif #arg > 0 then
+-- local result = { }
+-- for i=1,#arg do
+-- result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote
+-- end
+-- return concat(result," ")
+-- else
+-- return ""
+-- end
+-- end
if arg then
@@ -8289,9 +9007,11 @@ function environment.loadluafile(filename, version)
local lucname, luaname, chunk
local basename = file.removesuffix(filename)
if basename == filename then
- lucname, luaname = basename .. ".luc", basename .. ".lua"
+ luaname = fiule.addsuffix(basename,luasuffixes.lua)
+ lucname = fiule.addsuffix(basename,luasuffixes.luc)
else
- lucname, luaname = nil, basename -- forced suffix
+ luaname = basename -- forced suffix
+ lucname = nil
end
-- when not overloaded by explicit suffix we look for a luc file first
local fullname = (lucname and environment.luafile(lucname)) or ""
@@ -8372,7 +9092,6 @@ xml = xml or { }
local xml = xml
-local utf = unicode.utf8
local concat, remove, insert = table.concat, table.remove, table.insert
local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber
local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub
@@ -9583,7 +10302,7 @@ if not modules then modules = { } end modules ['lxml-lpt'] = {
-- todo: B/C/[get first match]
local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
+local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
@@ -10195,7 +10914,7 @@ end
local function register_expression(expression)
local converted = lpegmatch(converter,expression)
- local runner = loadstring(format(template_e,converted))
+ local runner = load(format(template_e,converted))
runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
end
@@ -10203,9 +10922,9 @@ end
local function register_finalizer(protocol,name,arguments)
local runner
if arguments and arguments ~= "" then
- runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
+ runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
else
- runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name))
+ runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
end
runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
@@ -10597,6 +11316,7 @@ end
expressions.child = function(e,pattern)
return applylpath(e,pattern) -- todo: cache
end
+
expressions.count = function(e,pattern) -- what if pattern == empty or nil
local collected = applylpath(e,pattern) -- todo: cache
return pattern and (collected and #collected) or 0
@@ -10604,13 +11324,30 @@ end
-- external
-expressions.oneof = function(s,...) -- slow
- local t = {...} for i=1,#t do if s == t[i] then return true end end return false
+-- expressions.oneof = function(s,...)
+-- local t = {...}
+-- for i=1,#t do
+-- if s == t[i] then
+-- return true
+-- end
+-- end
+-- return false
+-- end
+
+expressions.oneof = function(s,...)
+ for i=1,select("#",...) do
+ if s == select(i,...) then
+ return true
+ end
+ end
+ return false
end
+
expressions.error = function(str)
xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
return false
end
+
expressions.undefined = function(s)
return s == nil
end
@@ -12220,7 +12957,6 @@ if not modules then modules = { } end modules ['data-ini'] = {
}
local gsub, find, gmatch, char = string.gsub, string.find, string.gmatch, string.char
-local concat = table.concat
local next, type = next, type
local filedirname, filebasename, filejoin = file.dirname, file.basename, file.join
@@ -12311,6 +13047,10 @@ do
local args = environment.originalarguments or arg -- this needs a cleanup
+ if not environment.ownmain then
+ environment.ownmain = status and string.match(string.lower(status.banner),"this is ([%a]+)") or "luatex"
+ end
+
local ownbin = environment.ownbin or args[-2] or arg[-2] or args[-1] or arg[-1] or arg[0] or "luatex"
local ownpath = environment.ownpath or os.selfdir
@@ -12427,19 +13167,6 @@ end
environment.texroot = file.collapsepath(texroot)
--- Tracing. Todo ...
-
-function resolvers.settrace(n) -- no longer number but: 'locating' or 'detail'
- if n then
- trackers.disable("resolvers.*")
- trackers.enable("resolvers."..n)
- end
-end
-
-resolvers.settrace(osgetenv("MTX_INPUT_TRACE"))
-
--- todo:
-
if profiler then
directives.register("system.profile",function()
profiler.start("luatex-profile.log")
@@ -12946,6 +13673,8 @@ resolvers.suffixes = suffixes
resolvers.dangerous = dangerous
resolvers.suffixmap = suffixmap
+local luasuffixes = utilities.lua.suffixes
+
local relations = allocate { -- todo: handlers also here
core = {
ofm = { -- will become obsolete
@@ -13031,7 +13760,7 @@ local relations = allocate { -- todo: handlers also here
lua = {
names = { "lua" },
variable = 'LUAINPUTS',
- suffixes = { 'lua', 'luc', 'tma', 'tmc' },
+ suffixes = { luasuffixes.lua, luasuffixes.luc, luasuffixes.tma, luasuffixes.tmc },
},
lib = {
names = { "lib" },
@@ -13227,6 +13956,7 @@ if not modules then modules = { } end modules ['data-tmp'] = {
local format, lower, gsub, concat = string.format, string.lower, string.gsub, table.concat
local serialize, serializetofile = table.serialize, table.tofile
local mkdirs, isdir = dir.mkdirs, lfs.isdir
+local addsuffix, is_writable, is_readable = file.addsuffix, file.is_writable, file.is_readable
local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end)
local trace_cache = false trackers.register("resolvers.cache", function(v) trace_cache = v end)
@@ -13251,8 +13981,10 @@ end
-- end of intermezzo
-caches = caches or { }
-local caches = caches
+caches = caches or { }
+local caches = caches
+
+local luasuffixes = utilities.lua.suffixes
caches.base = caches.base or "luatex-cache"
caches.more = caches.more or "context"
@@ -13280,18 +14012,18 @@ local function identify()
cachepath = file.collapsepath(cachepath)
local valid = isdir(cachepath)
if valid then
- if file.is_readable(cachepath) then
+ if is_readable(cachepath) then
readables[#readables+1] = cachepath
- if not writable and file.is_writable(cachepath) then
+ if not writable and is_writable(cachepath) then
writable = cachepath
end
end
elseif not writable and caches.force then
local cacheparent = file.dirname(cachepath)
- if file.is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths)
+ if is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths)
if not caches.ask or io.ask(format("\nShould I create the cache path %s?",cachepath), "no", { "yes", "no" }) == "yes" then
mkdirs(cachepath)
- if isdir(cachepath) and file.is_writable(cachepath) then
+ if isdir(cachepath) and is_writable(cachepath) then
report_caches("created: %s",cachepath)
writable = cachepath
readables[#readables+1] = cachepath
@@ -13313,8 +14045,8 @@ local function identify()
cachepath = resolvers.resolve(cachepath)
cachepath = resolvers.cleanpath(cachepath)
local valid = isdir(cachepath)
- if valid and file.is_readable(cachepath) then
- if not writable and file.is_writable(cachepath) then
+ if valid and is_readable(cachepath) then
+ if not writable and is_writable(cachepath) then
readables[#readables+1] = cachepath
writable = cachepath
break
@@ -13403,7 +14135,7 @@ end
local r_cache, w_cache = { }, { } -- normally w in in r but who cares
-local function getreadablepaths(...) -- we can optimize this as we have at most 2 tags
+local function getreadablepaths(...)
local tags = { ... }
local hash = concat(tags,"/")
local done = r_cache[hash]
@@ -13446,7 +14178,7 @@ function caches.getfirstreadablefile(filename,...)
for i=1,#rd do
local path = rd[i]
local fullname = file.join(path,filename)
- if file.is_readable(fullname) then
+ if is_readable(fullname) then
usedreadables[i] = true
return fullname, path
end
@@ -13467,7 +14199,7 @@ function caches.define(category,subcategory) -- for old times sake
end
function caches.setluanames(path,name)
- return path .. "/" .. name .. ".tma", path .. "/" .. name .. ".tmc"
+ return format("%s/%s.%s",path,name,luasuffixes.tma), format("%s/%s.%s",path,name,luasuffixes.tmc)
end
function caches.loaddata(readables,name)
@@ -13477,7 +14209,13 @@ function caches.loaddata(readables,name)
for i=1,#readables do
local path = readables[i]
local tmaname, tmcname = caches.setluanames(path,name)
- local loader = loadfile(tmcname) or loadfile(tmaname)
+ local loader = loadfile(tmcname)
+ if not loader then
+ -- in case we have a different engine
+ utilities.lua.compile(tmaname,tmcname)
+ --
+ loader = loadfile(tmaname)
+ end
if loader then
loader = loader()
collectgarbage("step")
@@ -13489,11 +14227,15 @@ end
function caches.is_writable(filepath,filename)
local tmaname, tmcname = caches.setluanames(filepath,filename)
- return file.is_writable(tmaname)
+ return is_writable(tmaname)
end
local saveoptions = { compact = true }
+-- add some point we will only use the internal bytecode compiler and
+-- then we can flag success in the tma so that it can trigger a compile
+-- if the other engine
+
function caches.savedata(filepath,filename,data,raw)
local tmaname, tmcname = caches.setluanames(filepath,filename)
local reduce, simplify = true, true
@@ -13519,9 +14261,9 @@ end
function caches.loadcontent(cachename,dataname)
local name = caches.hashed(cachename)
- local full, path = caches.getfirstreadablefile(name ..".lua","trees")
+ local full, path = caches.getfirstreadablefile(addsuffix(name,luasuffixes.lua),"trees")
local filename = file.join(path,name)
- local blob = loadfile(filename .. ".luc") or loadfile(filename .. ".lua")
+ local blob = loadfile(addsuffix(filename,luasuffixes.luc)) or loadfile(addsuffix(filename,luasuffixes.lua))
if blob then
local data = blob()
if data and data.content then
@@ -13556,9 +14298,10 @@ end
function caches.savecontent(cachename,dataname,content)
local name = caches.hashed(cachename)
- local full, path = caches.setfirstwritablefile(name ..".lua","trees")
+ local full, path = caches.setfirstwritablefile(addsuffix(name,luasuffixes.lua),"trees")
local filename = file.join(path,name) -- is full
- local luaname, lucname = filename .. ".lua", filename .. ".luc"
+ local luaname = addsuffix(filename,luasuffixes.lua)
+ local lucname = addsuffix(filename,luasuffixes.luc)
if trace_locating then
report_resolvers("preparing '%s' for '%s'",dataname,cachename)
end
@@ -13763,6 +14506,7 @@ local joinpath = file.joinpath
local allocate = utilities.storage.allocate
local settings_to_array = utilities.parsers.settings_to_array
local setmetatableindex = table.setmetatableindex
+local luasuffixes = utilities.lua.suffixes
local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end)
local trace_detail = false trackers.register("resolvers.details", function(v) trace_detail = v end)
@@ -15325,15 +16069,19 @@ function resolvers.dowithvariable(name,func)
end
function resolvers.locateformat(name)
- local barename = file.removesuffix(name) -- gsub(name,"%.%a+$","")
- local fmtname = caches.getfirstreadablefile(barename..".fmt","formats") or ""
+ local engine = environment.ownmain or "luatex"
+ local barename = file.removesuffix(name)
+ local fullname = file.addsuffix(barename,"fmt")
+ local fmtname = caches.getfirstreadablefile(fullname,"formats",engine) or ""
if fmtname == "" then
- fmtname = resolvers.findfile(barename..".fmt")
+ fmtname = resolvers.findfile(fullname)
fmtname = resolvers.cleanpath(fmtname)
end
if fmtname ~= "" then
local barename = file.removesuffix(fmtname)
- local luaname, lucname, luiname = barename .. ".lua", barename .. ".luc", barename .. ".lui"
+ local luaname = file.addsuffix(barename,luasuffixes.lua)
+ local lucname = file.addsuffix(barename,luasuffixes.luc)
+ local luiname = file.addsuffix(barename,luasuffixes.lui)
if lfs.isfile(luiname) then
return barename, luiname
elseif lfs.isfile(lucname) then
@@ -15430,10 +16178,9 @@ local resolvers = resolvers
local prefixes = utilities.storage.allocate()
resolvers.prefixes = prefixes
-local gsub = string.gsub
local cleanpath, findgivenfile, expansion = resolvers.cleanpath, resolvers.findgivenfile, resolvers.expansion
local getenv = resolvers.getenv -- we can probably also use resolvers.expansion
-local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match
+local P, S, R, C, Cs, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cs, lpeg.match
local joinpath, basename, dirname = file.join, file.basename, file.dirname
local getmetatable, rawset, type = getmetatable, rawset, type
@@ -15555,6 +16302,28 @@ end
-- todo: use an lpeg (see data-lua for !! / stripper)
+-- local function resolve(str) -- use schemes, this one is then for the commandline only
+-- if type(str) == "table" then
+-- local t = { }
+-- for i=1,#str do
+-- t[i] = resolve(str[i])
+-- end
+-- return t
+-- else
+-- local res = resolved[str]
+-- if not res then
+-- res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added)
+-- resolved[str] = res
+-- abstract[res] = str
+-- end
+-- return res
+-- end
+-- end
+
+-- home:xx;selfautoparent:xx;
+
+local pattern = Cs((C(R("az")^2) * P(":") * C((1-S(" \"\';,"))^1) / _resolve_ + P(1))^0)
+
local function resolve(str) -- use schemes, this one is then for the commandline only
if type(str) == "table" then
local t = { }
@@ -15565,7 +16334,7 @@ local function resolve(str) -- use schemes, this one is then for the commandline
else
local res = resolved[str]
if not res then
- res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added)
+ res = lpegmatch(pattern,str)
resolved[str] = res
abstract[res] = str
end
@@ -15998,7 +16767,7 @@ statistics.register("used cache path", function() return caches.usedpaths() end
function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname == formatname
local enginebanner = status.list().banner
if formatbanner and enginebanner and sourcefile then
- local luvname = file.replacesuffix(texname,"luv")
+ local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv
local luvdata = {
enginebanner = enginebanner,
formatbanner = formatbanner,
@@ -16009,10 +16778,14 @@ function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname ==
end
end
+-- todo: check this at startup and return (say) 999 as signal that the run
+-- was aborted due to a wrong format in which case mtx-context can trigger
+-- a remake
+
function statistics.checkfmtstatus(texname)
local enginebanner = status.list().banner
if enginebanner and texname then
- local luvname = file.replacesuffix(texname,"luv")
+ local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv
if lfs.isfile(luvname) then
local luv = dofile(luvname)
if luv and luv.sourcefile then
@@ -16389,7 +17162,7 @@ if not modules then modules = { } end modules ['data-sch'] = {
license = "see context related readme files"
}
-local loadstring = loadstring
+local load = load
local gsub, concat, format = string.gsub, table.concat, string.format
local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders
@@ -16575,7 +17348,7 @@ schemes.fetchstring = fetchstring
function schemes.fetchtable(url,data)
local reply = fetchstring(url,data)
if reply then
- local s = loadstring("return " .. reply)
+ local s = load("return " .. reply)
if s then
return s()
end
@@ -16603,6 +17376,8 @@ if not modules then modules = { } end modules ['data-lua'] = {
-- -- local mylib = require("libtest")
-- -- local mysql = require("luasql.mysql")
+local searchers = package.searchers or package.loaders
+
local concat = table.concat
local trace_libraries = false
@@ -16704,13 +17479,9 @@ function package.extraclibpath(...)
end
end
-if not package.loaders then
- package.loaders = package.searchers -- 5.2
-end
-
-if not package.loaders[-2] then
+if not searchers[-2] then
-- use package-path and package-cpath
- package.loaders[-2] = package.loaders[2]
+ searchers[-2] = searchers[2]
end
local function loadedaslib(resolved,rawname)
@@ -16721,7 +17492,7 @@ local function loadedbylua(name)
if trace_libraries then
report_libraries("! locating %q using normal loader",name)
end
- local resolved = package.loaders[-2](name)
+ local resolved = searchers[-2](name)
end
local function loadedbyformat(name,rawname,suffixes,islib)
@@ -16776,7 +17547,7 @@ local function notloaded(name)
end
end
-package.loaders[2] = function(name)
+searchers[2] = function(name)
local thename = gsub(name,"%.","/")
local luaname = file.addsuffix(thename,"lua")
local libname = file.addsuffix(thename,os.libsuffix)
@@ -16790,8 +17561,8 @@ package.loaders[2] = function(name)
or notloaded (name)
end
--- package.loaders[3] = nil
--- package.loaders[4] = nil
+-- searchers[3] = nil
+-- searchers[4] = nil
resolvers.loadlualib = require
@@ -17161,13 +17932,11 @@ if not modules then modules = { } end modules ['luat-fmt'] = {
local format = string.format
+local quoted = string.quoted
+local luasuffixes = utilities.lua.suffixes
local report_format = logs.reporter("resolvers","formats")
--- helper for mtxrun
-
-local quoted = string.quoted
-
local function primaryflags() -- not yet ok
local trackers = environment.argument("trackers")
local directives = environment.argument("directives")
@@ -17182,13 +17951,14 @@ local function primaryflags() -- not yet ok
end
function environment.make_format(name)
+ local engine = environment.ownmain or "luatex"
-- change to format path (early as we need expanded paths)
- local olddir = lfs.currentdir()
- local path = caches.getwritablepath("formats") or "" -- maybe platform
+ local olddir = dir.current()
+ local path = caches.getwritablepath("formats",engine) or "" -- maybe platform
if path ~= "" then
lfs.chdir(path)
end
- report_format("format path: %s",lfs.currentdir())
+ report_format("format path: %s",dir.current())
-- check source file
local texsourcename = file.addsuffix(name,"mkiv")
local fulltexsourcename = resolvers.findfile(texsourcename,"tex") or ""
@@ -17225,13 +17995,12 @@ function environment.make_format(name)
elseif type(usedlualibs) == "table" then
report_format("using stub specification: %s",fullspecificationname)
local texbasename = file.basename(name)
- local luastubname = file.addsuffix(texbasename,"lua")
- local lucstubname = file.addsuffix(texbasename,"luc")
+ local luastubname = file.addsuffix(texbasename,luasuffixes.lua)
+ local lucstubname = file.addsuffix(texbasename,luasuffixes.luc)
-- pack libraries in stub
report_format("creating initialization file: %s",luastubname)
utilities.merger.selfcreate(usedlualibs,specificationpath,luastubname)
-- compile stub file (does not save that much as we don't use this stub at startup any more)
- local strip = resolvers.booleanvariable("LUACSTRIP", true)
if utilities.lua.compile(luastubname,lucstubname) and lfs.isfile(lucstubname) then
report_format("using compiled initialization file: %s",lucstubname)
usedluastub = lucstubname
@@ -17245,7 +18014,7 @@ function environment.make_format(name)
return
end
-- generate format
- local command = format("luatex --ini %s --lua=%s %s %sdump",primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\")
+ local command = format("%s --ini %s --lua=%s %s %sdump",engine,primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\")
report_format("running command: %s\n",command)
os.spawn(command)
-- remove related mem files
@@ -17264,8 +18033,9 @@ end
function environment.run_format(name,data,more)
if name and name ~= "" then
+ local engine = environment.ownmain or "luatex"
local barename = file.removesuffix(name)
- local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats")
+ local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats",engine)
if fmtname == "" then
fmtname = resolvers.findfile(file.addsuffix(barename,"fmt")) or ""
end
@@ -17282,7 +18052,7 @@ function environment.run_format(name,data,more)
report_format("using format name: %s",fmtname)
report_format("no luc/lua with name: %s",barename)
else
- local command = format("luatex %s --fmt=%s --lua=%s %s %s",primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "")
+ local command = format("%s %s --fmt=%s --lua=%s %s %s",engine,primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "")
report_format("running command: %s",command)
os.spawn(command)
end
@@ -17423,10 +18193,11 @@ own = { } -- not local, might change
own.libs = { -- order can be made better
+ 'l-lua.lua',
+ 'l-lpeg.lua',
'l-function.lua',
'l-string.lua',
'l-table.lua',
- 'l-lpeg.lua',
'l-io.lua',
'l-number.lua',
'l-set.lua',
@@ -17656,6 +18427,10 @@ local helpinfo = [[
--variables show configuration variables
--configurations show configuration order
+--directives show (known) directives
+--trackers show (known) trackers
+--experiments show (known) experiments
+
--expand-braces expand complex variable
--expand-path expand variable (resolve paths)
--expand-var expand variable (resolve references)
@@ -18346,8 +19121,18 @@ else
end
+if e_argument("script") or e_argument("scripts") then
+
+ -- run a script by loading it (using libs), pass args
-if e_argument("selfmerge") then
+ runners.loadbase()
+ if is_mkii_stub then
+ ok = runners.execute_script(filename,false,true)
+ else
+ ok = runners.execute_ctx_script(filename)
+ end
+
+elseif e_argument("selfmerge") then
-- embed used libraries
@@ -18377,17 +19162,6 @@ elseif e_argument("ctxlua") or e_argument("internal") then
runners.loadbase()
ok = runners.execute_script(filename,true)
-elseif e_argument("script") or e_argument("scripts") then
-
- -- run a script by loading it (using libs), pass args
-
- runners.loadbase()
- if is_mkii_stub then
- ok = runners.execute_script(filename,false,true)
- else
- ok = runners.execute_ctx_script(filename)
- end
-
elseif e_argument("execute") then
-- execute script
@@ -18615,6 +19389,18 @@ elseif e_argument("version") then
application.version()
+elseif e_argument("directives") then
+
+ directives.show()
+
+elseif e_argument("trackers") then
+
+ trackers.show()
+
+elseif e_argument("experiments") then
+
+ experiments.show()
+
elseif e_argument("help") or filename=='help' or filename == "" then
application.help()
diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun
index 01c601eb5..3a02ad582 100755
--- a/scripts/context/stubs/unix/mtxrun
+++ b/scripts/context/stubs/unix/mtxrun
@@ -1,5 +1,16 @@
#!/usr/bin/env texlua
+-- for k, v in next, _G.string do
+-- local tv = type(v)
+-- if tv == "table" then
+-- for kk, vv in next, v do
+-- print(k,kk,vv)
+-- end
+-- else
+-- print(tv,k,v)
+-- end
+-- end
+
if not modules then modules = { } end modules ['mtxrun'] = {
version = 1.001,
comment = "runner, lua replacement for texmfstart.rb",
@@ -43,7 +54,7 @@ if not modules then modules = { } end modules ['mtxrun'] = {
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-functions'] = {
+if not modules then modules = { } end modules ['l-lua'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -51,16 +62,112 @@ if not modules then modules = { } end modules ['l-functions'] = {
license = "see context related readme files"
}
-functions = functions or { }
+-- compatibility hacks ... try to avoid usage
-function functions.dummy() end
+local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$")
+
+_MAJORVERSION = tonumber(major) or 5
+_MINORVERSION = tonumber(minor) or 1
+
+-- basics:
+
+if loadstring then
+
+ local loadnormal = load
+
+ function load(first,...)
+ if type(first) == "string" then
+ return loadstring(first,...)
+ else
+ return loadnormal(first,...)
+ end
+ end
+
+else
+
+ loadstring = load
+
+end
+
+-- table:
+
+-- Starting with version 5.2 Lua no longer provide ipairs, which makes
+-- sense. As we already used the for loop and # in most places the
+-- impact on ConTeXt was not that large; the remaining ipairs already
+-- have been replaced. In a similar fashion we also hardly used pairs.
+--
+-- Hm, actually ipairs was retained, but we no longer use it anyway.
+--
+-- Just in case, we provide the fallbacks as discussed in Programming
+-- in Lua (http://www.lua.org/pil/7.3.html):
+
+if not ipairs then
+
+ -- for k, v in ipairs(t) do ... end
+ -- for k=1,#t do local v = t[k] ... end
+
+ local function iterate(a,i)
+ i = i + 1
+ local v = a[i]
+ if v ~= nil then
+ return i, v --, nil
+ end
+ end
+
+ function ipairs(a)
+ return iterate, a, 0
+ end
+
+end
+
+if not pairs then
+
+ -- for k, v in pairs(t) do ... end
+ -- for k, v in next, t do ... end
+
+ function pairs(t)
+ return next, t -- , nil
+ end
+
+end
+
+-- The unpack function has been moved to the table table, and for compatiility
+-- reasons we provide both now.
+
+if not table.unpack then
+
+ table.unpack = _G.unpack
+
+elseif not unpack then
+
+ _G.unpack = table.unpack
+
+end
+
+-- package:
+
+-- if not package.seachers then
+--
+-- package.searchers = package.loaders -- 5.2
+--
+-- elseif not package.loaders then
+--
+-- package.loaders = package.searchers
+--
+-- end
+
+if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type
+
+ package.loaders = package.searchers
+
+end
end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-string'] = {
+if not modules then modules = { } end modules ['l-lpeg'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -68,34 +175,843 @@ if not modules then modules = { } end modules ['l-string'] = {
license = "see context related readme files"
}
-local string = string
-local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
-local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct
+-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
--- some functions may disappear as they are not used anywhere
+-- move utf -> l-unicode
+-- move string -> l-string or keep it here
-if not string.split then
+local lpeg = require("lpeg")
- -- this will be overloaded by a faster lpeg variant
+-- tracing (only used when we encounter a problem in integration of lpeg in luatex)
- function string.split(str,pattern)
- local t = { }
- if #str > 0 then
- local n = 1
- for s in gmatch(str..pattern,"(.-)"..pattern) do
- t[n] = s
- n = n + 1
+-- some code will move to unicode and string
+
+local report = texio and texio.write_nl or print
+
+-- local lpmatch = lpeg.match
+-- local lpprint = lpeg.print
+-- local lpp = lpeg.P
+-- local lpr = lpeg.R
+-- local lps = lpeg.S
+-- local lpc = lpeg.C
+-- local lpb = lpeg.B
+-- local lpv = lpeg.V
+-- local lpcf = lpeg.Cf
+-- local lpcb = lpeg.Cb
+-- local lpcg = lpeg.Cg
+-- local lpct = lpeg.Ct
+-- local lpcs = lpeg.Cs
+-- local lpcc = lpeg.Cc
+-- local lpcmt = lpeg.Cmt
+-- local lpcarg = lpeg.Carg
+
+-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end
+
+-- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end
+-- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end
+-- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end
+-- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end
+-- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end
+-- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end
+-- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end
+-- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end
+-- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end
+-- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end
+-- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end
+-- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end
+-- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end
+-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end
+
+local type, next = type, next
+local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
+
+-- Beware, we predefine a bunch of patterns here and one reason for doing so
+-- is that we get consistent behaviour in some of the visualizers.
+
+lpeg.patterns = lpeg.patterns or { } -- so that we can share
+local patterns = lpeg.patterns
+
+local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt
+local lpegtype, lpegmatch = lpeg.type, lpeg.match
+
+local anything = P(1)
+local endofstring = P(-1)
+local alwaysmatched = P(true)
+
+patterns.anything = anything
+patterns.endofstring = endofstring
+patterns.beginofstring = alwaysmatched
+patterns.alwaysmatched = alwaysmatched
+
+local digit, sign = R('09'), S('+-')
+local cr, lf, crlf = P("\r"), P("\n"), P("\r\n")
+local newline = crlf + S("\r\n") -- cr + lf
+local escaped = P("\\") * anything
+local squote = P("'")
+local dquote = P('"')
+local space = P(" ")
+
+local utfbom_32_be = P('\000\000\254\255')
+local utfbom_32_le = P('\255\254\000\000')
+local utfbom_16_be = P('\255\254')
+local utfbom_16_le = P('\254\255')
+local utfbom_8 = P('\239\187\191')
+local utfbom = utfbom_32_be + utfbom_32_le
+ + utfbom_16_be + utfbom_16_le
+ + utfbom_8
+local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le")
+ + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le")
+ + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8
+local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4)
+ + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2)
+ + utfbom_8 * Cc(3) + Cc(0)
+
+local utf8next = R("\128\191")
+
+patterns.utf8one = R("\000\127")
+patterns.utf8two = R("\194\223") * utf8next
+patterns.utf8three = R("\224\239") * utf8next * utf8next
+patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next
+patterns.utfbom = utfbom
+patterns.utftype = utftype
+patterns.utfoffset = utfoffset
+
+local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
+local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false)
+
+patterns.utf8 = utf8char
+patterns.utf8char = utf8char
+patterns.validutf8 = validutf8char
+patterns.validutf8char = validutf8char
+
+local eol = S("\n\r")
+local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
+local whitespace = eol + spacer
+local nonspacer = 1 - spacer
+local nonwhitespace = 1 - whitespace
+
+patterns.eol = eol
+patterns.spacer = spacer
+patterns.whitespace = whitespace
+patterns.nonspacer = nonspacer
+patterns.nonwhitespace = nonwhitespace
+
+local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto
+
+----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0)
+local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0))
+
+patterns.stripper = stripper
+patterns.collapser = collapser
+
+patterns.digit = digit
+patterns.sign = sign
+patterns.cardinal = sign^0 * digit^1
+patterns.integer = sign^0 * digit^1
+patterns.unsigned = digit^0 * P('.') * digit^1
+patterns.float = sign^0 * patterns.unsigned
+patterns.cunsigned = digit^0 * P(',') * digit^1
+patterns.cfloat = sign^0 * patterns.cunsigned
+patterns.number = patterns.float + patterns.integer
+patterns.cnumber = patterns.cfloat + patterns.integer
+patterns.oct = P("0") * R("07")^1
+patterns.octal = patterns.oct
+patterns.HEX = P("0x") * R("09","AF")^1
+patterns.hex = P("0x") * R("09","af")^1
+patterns.hexadecimal = P("0x") * R("09","AF","af")^1
+patterns.lowercase = R("az")
+patterns.uppercase = R("AZ")
+patterns.letter = patterns.lowercase + patterns.uppercase
+patterns.space = space
+patterns.tab = P("\t")
+patterns.spaceortab = patterns.space + patterns.tab
+patterns.newline = newline
+patterns.emptyline = newline^1
+patterns.equal = P("=")
+patterns.comma = P(",")
+patterns.commaspacer = P(",") * spacer^0
+patterns.period = P(".")
+patterns.colon = P(":")
+patterns.semicolon = P(";")
+patterns.underscore = P("_")
+patterns.escaped = escaped
+patterns.squote = squote
+patterns.dquote = dquote
+patterns.nosquote = (escaped + (1-squote))^0
+patterns.nodquote = (escaped + (1-dquote))^0
+patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle
+patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle
+patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble
+patterns.unspacer = ((patterns.spacer^1)/"")^0
+
+patterns.singlequoted = squote * patterns.nosquote * squote
+patterns.doublequoted = dquote * patterns.nodquote * dquote
+patterns.quoted = patterns.doublequoted + patterns.singlequoted
+
+patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1)
+
+patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1
+patterns.beginline = #(1-newline)
+
+local function anywhere(pattern) --slightly adapted from website
+ return P { P(pattern) + 1 * V(1) }
+end
+
+lpeg.anywhere = anywhere
+
+function lpeg.instringchecker(p)
+ p = anywhere(p)
+ return function(str)
+ return lpegmatch(p,str) and true or false
+ end
+end
+
+function lpeg.splitter(pattern, action)
+ return (((1-P(pattern))^1)/action+1)^0
+end
+
+function lpeg.tsplitter(pattern, action)
+ return Ct((((1-P(pattern))^1)/action+1)^0)
+end
+
+-- probleem: separator can be lpeg and that does not hash too well, but
+-- it's quite okay as the key is then not garbage collected
+
+local splitters_s, splitters_m, splitters_t = { }, { }, { }
+
+local function splitat(separator,single)
+ local splitter = (single and splitters_s[separator]) or splitters_m[separator]
+ if not splitter then
+ separator = P(separator)
+ local other = C((1 - separator)^0)
+ if single then
+ local any = anything
+ splitter = other * (separator * C(any^0) + "") -- ?
+ splitters_s[separator] = splitter
+ else
+ splitter = other * (separator * other)^0
+ splitters_m[separator] = splitter
+ end
+ end
+ return splitter
+end
+
+local function tsplitat(separator)
+ local splitter = splitters_t[separator]
+ if not splitter then
+ splitter = Ct(splitat(separator))
+ splitters_t[separator] = splitter
+ end
+ return splitter
+end
+
+lpeg.splitat = splitat
+lpeg.tsplitat = tsplitat
+
+function string.splitup(str,separator)
+ if not separator then
+ separator = ","
+ end
+ return lpegmatch(splitters_m[separator] or splitat(separator),str)
+end
+
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps
+
+local cache = { }
+
+function lpeg.split(separator,str)
+ local c = cache[separator]
+ if not c then
+ c = tsplitat(separator)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+function string.split(str,separator)
+ if separator then
+ local c = cache[separator]
+ if not c then
+ c = tsplitat(separator)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+ else
+ return { str }
+ end
+end
+
+local spacing = patterns.spacer^0 * newline -- sort of strip
+local empty = spacing * Cc("")
+local nonempty = Cs((1-spacing)^1) * spacing^-1
+local content = (empty + nonempty)^1
+
+patterns.textline = content
+
+local linesplitter = tsplitat(newline)
+
+patterns.linesplitter = linesplitter
+
+function string.splitlines(str)
+ return lpegmatch(linesplitter,str)
+end
+
+-- lpeg.splitters = cache -- no longer public
+
+local cache = { }
+
+function lpeg.checkedsplit(separator,str)
+ local c = cache[separator]
+ if not c then
+ separator = P(separator)
+ local other = C((1 - separator)^1)
+ c = Ct(separator^0 * other * (separator^1 * other)^0)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+function string.checkedsplit(str,separator)
+ local c = cache[separator]
+ if not c then
+ separator = P(separator)
+ local other = C((1 - separator)^1)
+ c = Ct(separator^0 * other * (separator^1 * other)^0)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+end
+
+-- from roberto's site:
+
+local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
+local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
+local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
+
+local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
+
+patterns.utf8byte = utf8byte
+
+
+
+local cache = { }
+
+function lpeg.stripper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs(((S(str)^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs(((str^1)/"" + 1)^0)
+ end
+end
+
+local cache = { }
+
+function lpeg.keeper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs((((1-S(str))^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs((((1-str)^1)/"" + 1)^0)
+ end
+end
+
+function lpeg.frontstripper(str) -- or pattern (yet undocumented)
+ return (P(str) + P(true)) * Cs(anything^0)
+end
+
+function lpeg.endstripper(str) -- or pattern (yet undocumented)
+ return Cs((1 - P(str) * endofstring)^0)
+end
+
+-- Just for fun I looked at the used bytecode and
+-- p = (p and p + pp) or pp gets one more (testset).
+
+-- todo: cache when string
+
+function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys
+ local pattern
+ local u = isutf and utf8char or 1
+ if type(one) == "table" then
+ local no = #one
+ local p = P(false)
+ if no == 0 then
+ for k, v in next, one do
+ p = p + P(k) / v
+ end
+ pattern = Cs((p + u)^0)
+ elseif no == 1 then
+ local o = one[1]
+ one, two = P(o[1]), o[2]
+ -- pattern = Cs(((1-one)^1 + one/two)^0)
+ pattern = Cs((one/two + u)^0)
+ else
+ for i=1,no do
+ local o = one[i]
+ p = p + P(o[1]) / o[2]
+ end
+ pattern = Cs((p + u)^0)
+ end
+ else
+ pattern = Cs((P(one)/(two or "") + u)^0)
+ end
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+function lpeg.finder(lst,makefunction)
+ local pattern
+ if type(lst) == "table" then
+ pattern = P(false)
+ if #lst == 0 then
+ for k, v in next, lst do
+ pattern = pattern + P(k) -- ignore key, so we can use a replacer table
+ end
+ else
+ for i=1,#lst do
+ pattern = pattern + P(lst[i])
+ end
+ end
+ else
+ pattern = P(lst)
+ end
+ pattern = (1-pattern)^0 * pattern
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+-- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
+-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
+-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
+
+local splitters_f, splitters_s = { }, { }
+
+function lpeg.firstofsplit(separator) -- always return value
+ local splitter = splitters_f[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = C((1 - separator)^0)
+ splitters_f[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.secondofsplit(separator) -- nil if not split
+ local splitter = splitters_s[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = (1 - separator)^0 * separator * C(anything^0)
+ splitters_s[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.balancer(left,right)
+ left, right = P(left), P(right)
+ return P { left * ((1 - left - right) + V(1))^0 * right }
+end
+
+-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de"))
+-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty
+-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc"))
+-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de"))
+-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty
+-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc"))
+-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc"))
+-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc"))
+
+-- -- slower:
+--
+-- function lpeg.counter(pattern)
+-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0
+-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end
+-- end
+
+local nany = utf8char/""
+
+function lpeg.counter(pattern)
+ pattern = Cs((P(pattern)/" " + nany)^0)
+ return function(str)
+ return #lpegmatch(pattern,str)
+ end
+end
+
+-- utf extensies
+
+local utfcharacters = utf and utf.characters or string.utfcharacters
+local utfgmatch = unicode and unicode.utf8.gmatch
+local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char)
+
+lpeg.UP = lpeg.P
+
+if utfcharacters then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfcharacters(str) do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+
+elseif utfgmatch then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfgmatch(str,".") do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+else
+
+ function lpeg.US(str)
+ local p = P(false)
+ local f = function(uc)
+ p = p + P(uc)
+ end
+ lpegmatch((utf8char/f)^0,str)
+ return p
+ end
+
+end
+
+local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture
+
+function lpeg.UR(str,more)
+ local first, last
+ if type(str) == "number" then
+ first = str
+ last = more or first
+ else
+ first, last = lpegmatch(range,str)
+ if not last then
+ return P(str)
+ end
+ end
+ if first == last then
+ return P(str)
+ elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium
+ local p = P(false)
+ for i=first,last do
+ p = p + P(utfchar(i))
+ end
+ return p -- nil when invalid range
+ else
+ local f = function(b)
+ return b >= first and b <= last
+ end
+ -- tricky, these nested captures
+ return utf8byte / f -- nil when invalid range
+ end
+end
+
+-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω"))
+
+-- lpeg.print(lpeg.R("ab","cd","gh"))
+-- lpeg.print(lpeg.P("a","b","c"))
+-- lpeg.print(lpeg.S("a","b","c"))
+
+-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à")))
+-- print(lpeg.count("äáàa",lpeg.UP("áà")))
+-- print(lpeg.count("äáàa",lpeg.US("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR("aá")))
+-- print(lpeg.count("äáàa",lpeg.UR("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF)))
+
+function lpeg.is_lpeg(p)
+ return p and lpegtype(p) == "pattern"
+end
+
+function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
+ if type(list) ~= "table" then
+ list = { list, ... }
+ end
+ -- table.sort(list) -- longest match first
+ local p = P(list[1])
+ for l=2,#list do
+ p = p + P(list[l])
+ end
+ return p
+end
+
+-- For the moment here, but it might move to utilities. Beware, we need to
+-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
+-- loop back from the end cq. prepend.
+
+local sort = table.sort
+
+local function copyindexed(old)
+ local new = { }
+ for i=1,#old do
+ new[i] = old
+ end
+ return new
+end
+
+local function sortedkeys(tab)
+ local keys, s = { }, 0
+ for key,_ in next, tab do
+ s = s + 1
+ keys[s] = key
+ end
+ sort(keys)
+ return keys
+end
+
+function lpeg.append(list,pp,delayed,checked)
+ local p = pp
+ if #list > 0 then
+ local keys = copyindexed(list)
+ sort(keys)
+ for i=#keys,1,-1 do
+ local k = keys[i]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ elseif delayed then -- hm, it looks like the lpeg parser resolves anyway
+ local keys = sortedkeys(list)
+ if p then
+ for i=1,#keys,1 do
+ local k = keys[i]
+ local v = list[k]
+ p = P(k)/list + p
+ end
+ else
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ if p then
+ p = p / list
+ end
+ end
+ elseif checked then
+ -- problem: substitution gives a capture
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ if k == v then
+ p = P(k) + p
+ else
+ p = P(k)/v + p
+ end
+ else
+ if k == v then
+ p = P(k)
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ else
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k)/v + p
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ return p
+end
+
+-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true))
+-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true))
+
+-- function lpeg.exact_match(words,case_insensitive)
+-- local pattern = concat(words)
+-- if case_insensitive then
+-- local pattern = S(upper(characters)) + S(lower(characters))
+-- local list = { }
+-- for i=1,#words do
+-- list[lower(words[i])] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[lower(s)] and i
+-- end)
+-- else
+-- local pattern = S(concat(words))
+-- local list = { }
+-- for i=1,#words do
+-- list[words[i]] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[s] and i
+-- end)
+-- end
+-- end
+
+-- experiment:
+
+local function make(t)
+ local p
+ local keys = sortedkeys(t)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = t[k]
+ if not p then
+ if next(v) then
+ p = P(k) * make(v)
+ else
+ p = P(k)
+ end
+ else
+ if next(v) then
+ p = p + P(k) * make(v)
+ else
+ p = p + P(k)
end
end
- return t
end
+ return p
+end
+function lpeg.utfchartabletopattern(list) -- goes to util-lpg
+ local tree = { }
+ for i=1,#list do
+ local t = tree
+ for c in gmatch(list[i],".") do
+ if not t[c] then
+ t[c] = { }
+ end
+ t = t[c]
+ end
+ end
+ return make(tree)
end
+-- inspect ( lpeg.utfchartabletopattern {
+-- utfchar(0x00A0), -- nbsp
+-- utfchar(0x2000), -- enquad
+-- utfchar(0x2001), -- emquad
+-- utfchar(0x2002), -- enspace
+-- utfchar(0x2003), -- emspace
+-- utfchar(0x2004), -- threeperemspace
+-- utfchar(0x2005), -- fourperemspace
+-- utfchar(0x2006), -- sixperemspace
+-- utfchar(0x2007), -- figurespace
+-- utfchar(0x2008), -- punctuationspace
+-- utfchar(0x2009), -- breakablethinspace
+-- utfchar(0x200A), -- hairspace
+-- utfchar(0x200B), -- zerowidthspace
+-- utfchar(0x202F), -- narrownobreakspace
+-- utfchar(0x205F), -- math thinspace
+-- } )
+
+-- a few handy ones:
+--
+-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3
+
+patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['l-functions'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+functions = functions or { }
+
+function functions.dummy() end
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['l-string'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local string = string
+local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs
+
+-- Some functions are already defined in l-lpeg and maybe some from here will
+-- move there (unless we also expose caches).
+
+-- if not string.split then
+--
+-- function string.split(str,pattern)
+-- local t = { }
+-- if #str > 0 then
+-- local n = 1
+-- for s in gmatch(str..pattern,"(.-)"..pattern) do
+-- t[n] = s
+-- n = n + 1
+-- end
+-- end
+-- return t
+-- end
+--
+-- end
+
+-- function string.unquoted(str)
+-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern
+-- end
+
+local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote
+ + patterns.dquote * C(patterns.nodquote) * patterns.dquote
+
function string.unquoted(str)
- return (gsub(str,"^([\"\'])(.*)%1$","%2"))
+ return lpegmatch(unquoted,str) or str
end
+-- print(string.unquoted("test"))
+-- print(string.unquoted([["t\"est"]]))
+-- print(string.unquoted([["t\"est"x]]))
+-- print(string.unquoted("\'test\'"))
+-- print(string.unquoted('"test"'))
+-- print(string.unquoted('"test"'))
function string.quoted(str)
return format("%q",str) -- always "
@@ -118,65 +1034,112 @@ function string.limit(str,n,sentinel) -- not utf proof
end
end
-local space = S(" \t\v\n")
-local nospace = 1 - space
-local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code
+local stripper = patterns.stripper
+local collapser = patterns.collapser
function string.strip(str)
return lpegmatch(stripper,str) or ""
end
+function string.collapsespaces(str)
+ return lpegmatch(collapser,str) or ""
+end
+
+-- function string.is_empty(str)
+-- return not find(str,"%S")
+-- end
+
+local pattern = P(" ")^0 * P(-1)
+
function string.is_empty(str)
- return not find(str,"%S")
+ if str == "" then
+ return true
+ else
+ return lpegmatch(pattern,str) and true or false
+ end
end
-local patterns_escapes = {
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%(", [")"] = "%)",
- -- ["{"] = "%{", ["}"] = "%}"
- -- ["^"] = "%^", ["$"] = "%$",
-}
-local simple_escapes = {
- ["-"] = "%-",
- ["."] = "%.",
- ["?"] = ".",
- ["*"] = ".*",
-}
+-- if not string.escapedpattern then
+--
+-- local patterns_escapes = {
+-- ["%"] = "%%",
+-- ["."] = "%.",
+-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
+-- ["["] = "%[", ["]"] = "%]",
+-- ["("] = "%(", [")"] = "%)",
+-- -- ["{"] = "%{", ["}"] = "%}"
+-- -- ["^"] = "%^", ["$"] = "%$",
+-- }
+--
+-- local simple_escapes = {
+-- ["-"] = "%-",
+-- ["."] = "%.",
+-- ["?"] = ".",
+-- ["*"] = ".*",
+-- }
+--
+-- function string.escapedpattern(str,simple)
+-- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
+-- end
+--
+-- function string.topattern(str,lowercase,strict)
+-- if str == "" then
+-- return ".*"
+-- else
+-- str = gsub(str,".",simple_escapes)
+-- if lowercase then
+-- str = lower(str)
+-- end
+-- if strict then
+-- return "^" .. str .. "$"
+-- else
+-- return str
+-- end
+-- end
+-- end
+--
+-- end
+
+--- needs checking
+
+local anything = patterns.anything
+local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ?
+local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ?
+local matchescapes = Cc(".") * S("*?") -- wildcard and single match
+
+local pattern_a = Cs ( ( allescapes + anything )^0 )
+local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 )
+local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") )
function string.escapedpattern(str,simple)
- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
+ return lpegmatch(simple and pattern_b or pattern_a,str)
end
function string.topattern(str,lowercase,strict)
if str == "" then
return ".*"
+ elseif strict then
+ str = lpegmatch(pattern_c,str)
else
- str = gsub(str,".",simple_escapes)
- if lowercase then
- str = lower(str)
- end
- if strict then
- return "^" .. str .. "$"
- else
- return str
- end
+ str = lpegmatch(pattern_b,str)
+ end
+ if lowercase then
+ return lower(str)
+ else
+ return str
end
end
+-- print(string.escapedpattern("12+34*.tex",false))
+-- print(string.escapedpattern("12+34*.tex",true))
+-- print(string.topattern ("12+34*.tex",false,false))
+-- print(string.topattern ("12+34*.tex",false,true))
function string.valid(str,default)
return (type(str) == "string" and str ~= "" and str) or default or nil
end
--- obsolete names:
-
-string.quote = string.quoted
-string.unquote = string.unquoted
-
-- handy fallback
string.itself = function(s) return s end
@@ -189,6 +1152,19 @@ function string.totable(str)
return lpegmatch(pattern,str)
end
+-- handy from within tex:
+
+local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
+
+function string.tformat(fmt,...)
+ return format(lpegmatch(replacer,fmt),...)
+end
+
+-- obsolete names:
+
+string.quote = string.quoted
+string.unquote = string.unquoted
+
end -- of closure
@@ -202,68 +1178,23 @@ if not modules then modules = { } end modules ['l-table'] = {
license = "see context related readme files"
}
-local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs
+local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select
local table, string = table, string
local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove
-local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match
+local format, lower, dump = string.format, string.lower, string.dump
local getmetatable, setmetatable = getmetatable, setmetatable
local getinfo = debug.getinfo
-
--- Starting with version 5.2 Lua no longer provide ipairs, which makes
--- sense. As we already used the for loop and # in most places the
--- impact on ConTeXt was not that large; the remaining ipairs already
--- have been replaced. In a similar fashion we also hardly used pairs.
---
--- Hm, actually ipairs was retained, but we no longer use it anyway.
---
--- Just in case, we provide the fallbacks as discussed in Programming
--- in Lua (http://www.lua.org/pil/7.3.html):
-
-if not ipairs then
-
- -- for k, v in ipairs(t) do ... end
- -- for k=1,#t do local v = t[k] ... end
-
- local function iterate(a,i)
- i = i + 1
- local v = a[i]
- if v ~= nil then
- return i, v --, nil
- end
- end
-
- function ipairs(a)
- return iterate, a, 0
- end
-
-end
-
-if not pairs then
-
- -- for k, v in pairs(t) do ... end
- -- for k, v in next, t do ... end
-
- function pairs(t)
- return next, t -- , nil
- end
-
-end
-
--- Also, unpack has been moved to the table table, and for compatiility
--- reasons we provide both now.
-
-if not table.unpack then
- table.unpack = _G.unpack
-elseif not unpack then
- _G.unpack = table.unpack
-end
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local floor = math.floor
-- extra functions, some might go (when not used)
+local stripper = patterns.stripper
+
function table.strip(tab)
local lst, l = { }, 0
for i=1,#tab do
- local s = gsub(tab[i],"^%s*(.-)%s*$","%1")
+ local s = lpegmatch(stripper,tab[i]) or ""
if s == "" then
-- skip this one
else
@@ -372,7 +1303,7 @@ local function sortedhash(t)
end
table.sortedhash = sortedhash
-table.sortedpairs = sortedhash
+table.sortedpairs = sortedhash -- obsolete
function table.append(t,list)
local n = #t
@@ -396,31 +1327,63 @@ function table.prepend(t, list)
return t
end
+-- function table.merge(t, ...) -- first one is target
+-- t = t or { }
+-- local lst = { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- t[k] = v
+-- end
+-- end
+-- return t
+-- end
+
function table.merge(t, ...) -- first one is target
t = t or { }
- local lst = { ... }
- for i=1,#lst do
- for k, v in next, lst[i] do
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
t[k] = v
end
end
return t
end
+-- function table.merged(...)
+-- local tmp, lst = { }, { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- tmp[k] = v
+-- end
+-- end
+-- return tmp
+-- end
+
function table.merged(...)
- local tmp, lst = { }, { ... }
- for i=1,#lst do
- for k, v in next, lst[i] do
- tmp[k] = v
+ local t = { }
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
+ t[k] = v
end
end
- return tmp
+ return t
end
+-- function table.imerge(t, ...)
+-- local lst, nt = { ... }, #t
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- nt = nt + 1
+-- t[nt] = nst[j]
+-- end
+-- end
+-- return t
+-- end
+
function table.imerge(t, ...)
- local lst, nt = { ... }, #t
- for i=1,#lst do
- local nst = lst[i]
+ local nt = #t
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
nt = nt + 1
t[nt] = nst[j]
@@ -429,10 +1392,22 @@ function table.imerge(t, ...)
return t
end
+-- function table.imerged(...)
+-- local tmp, ntmp, lst = { }, 0, {...}
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- ntmp = ntmp + 1
+-- tmp[ntmp] = nst[j]
+-- end
+-- end
+-- return tmp
+-- end
+
function table.imerged(...)
- local tmp, ntmp, lst = { }, 0, {...}
- for i=1,#lst do
- local nst = lst[i]
+ local tmp, ntmp = { }, 0
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
ntmp = ntmp + 1
tmp[ntmp] = nst[j]
@@ -444,7 +1419,7 @@ end
local function fastcopy(old,metatabletoo) -- fast one
if old then
local new = { }
- for k,v in next, old do
+ for k, v in next, old do
if type(v) == "table" then
new[k] = fastcopy(v,metatabletoo) -- was just table.copy
else
@@ -498,7 +1473,7 @@ end
table.fastcopy = fastcopy
table.copy = copy
-function table.derive(parent)
+function table.derive(parent) -- for the moment not public
local child = { }
if parent then
setmetatable(child,{ __index = parent })
@@ -579,6 +1554,13 @@ end
-- problem: there no good number_to_string converter with the best resolution
+-- probably using .. is faster than format
+-- maybe split in a few cases (yes/no hexify)
+
+-- todo: %g faster on numbers than %s
+
+local propername = patterns.propername -- was find(name,"^%a[%w%_]*$")
+
local function dummy() end
local function do_serialize(root,name,depth,level,indexed)
@@ -588,14 +1570,14 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s{",depth))
else
local tn = type(name)
- if tn == "number" then -- or find(k,"^%d+$") then
+ if tn == "number" then
if hexify then
handle(format("%s[0x%04X]={",depth,name))
else
handle(format("%s[%s]={",depth,name))
end
elseif tn == "string" then
- if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
+ if noquotes and not reserved[name] and lpegmatch(propername,name) then
handle(format("%s%s={",depth,name))
else
handle(format("%s[%q]={",depth,name))
@@ -621,7 +1603,6 @@ local function do_serialize(root,name,depth,level,indexed)
if compact then
last = #root
for k=1,last do
--- if not root[k] then
if root[k] == nil then
last = k - 1
break
@@ -667,7 +1648,7 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s %s,",depth,tostring(v)))
elseif t == "function" then
if functions then
- handle(format('%s loadstring(%q),',depth,dump(v)))
+ handle(format('%s load(%q),',depth,dump(v)))
else
handle(format('%s "function",',depth))
end
@@ -679,7 +1660,7 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s __p__=nil,",depth))
end
elseif t == "number" then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=0x%04X,",depth,k,v))
else
@@ -691,7 +1672,7 @@ local function do_serialize(root,name,depth,level,indexed)
else
handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
if hexify then
handle(format("%s %s=0x%04X,",depth,k,v))
else
@@ -706,7 +1687,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "string" then
if reduce and tonumber(v) then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,v))
else
@@ -714,13 +1695,13 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]=%s,",depth,tostring(k),v))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,v))
else
handle(format("%s [%q]=%s,",depth,k,v))
end
else
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,v))
else
@@ -728,7 +1709,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]=%q,",depth,tostring(k),v))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,v))
else
handle(format("%s [%q]=%q,",depth,k,v))
@@ -736,7 +1717,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "table" then
if not next(v) then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={},",depth,k))
else
@@ -744,7 +1725,7 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif tk == "boolean" then
handle(format("%s [%s]={},",depth,tostring(k)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={},",depth,k))
else
handle(format("%s [%q]={},",depth,k))
@@ -752,15 +1733,15 @@ local function do_serialize(root,name,depth,level,indexed)
elseif inline then
local st = simple_table(v)
if st then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%s]={ %s },",depth,k,concat(st,", ")))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", ")))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%q]={ %s },",depth,k,concat(st,", ")))
@@ -772,15 +1753,15 @@ local function do_serialize(root,name,depth,level,indexed)
do_serialize(v,k,depth,level+1)
end
elseif t == "boolean" then
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%s,",depth,k,tostring(v)))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%s,",depth,k,tostring(v)))
@@ -789,30 +1770,30 @@ local function do_serialize(root,name,depth,level,indexed)
if functions then
local f = getinfo(v).what == "C" and dump(dummy) or dump(v)
-- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v)
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
- handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f))
+ handle(format("%s [0x%04X]=load(%q),",depth,k,f))
else
- handle(format("%s [%s]=loadstring(%q),",depth,k,f))
+ handle(format("%s [%s]=load(%q),",depth,k,f))
end
elseif tk == "boolean" then
- handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
- handle(format("%s %s=loadstring(%q),",depth,k,f))
+ handle(format("%s [%s]=load(%q),",depth,tostring(k),f))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+ handle(format("%s %s=load(%q),",depth,k,f))
else
- handle(format("%s [%q]=loadstring(%q),",depth,k,f))
+ handle(format("%s [%q]=load(%q),",depth,k,f))
end
end
else
- if tk == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%q,",depth,k,tostring(v)))
end
- elseif tk == "boolean" then -- or find(k,"^%d+$") then
+ elseif tk == "boolean" then
handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v)))
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%q,",depth,k,tostring(v)))
@@ -892,6 +1873,14 @@ local function serialize(_handle,root,name,specification) -- handle wins
handle("}")
end
+-- name:
+--
+-- true : return { }
+-- false : { }
+-- nil : t = { }
+-- string : string = { }
+-- "return" : return { }
+-- number : [number] = { }
function table.serialize(root,name,specification)
local t, n = { }, 0
@@ -980,7 +1969,7 @@ table.flattened = flattened
local function unnest(t,f) -- only used in mk, for old times sake
if not f then -- and only relevant for token lists
- f = { }
+ f = { } -- this one can become obsolete
end
for i=1,#t do
local v = t[i]
@@ -1009,7 +1998,7 @@ local function are_equal(a,b,n,m) -- indexed
local ai, bi = a[i], b[i]
if ai==bi then
-- same
- elseif type(ai)=="table" and type(bi)=="table" then
+ elseif type(ai) == "table" and type(bi) == "table" then
if not are_equal(ai,bi) then
return false
end
@@ -1044,10 +2033,10 @@ table.are_equal = are_equal
-- maybe also make a combined one
-function table.compact(t)
+function table.compact(t) -- remove empty tables, assumes subtables
if t then
- for k,v in next, t do
- if not next(v) then
+ for k, v in next, t do
+ if not next(v) then -- no type checking
t[k] = nil
end
end
@@ -1086,7 +2075,7 @@ function table.swapped(t,s) -- hash
return n
end
-function table.mirror(t) -- hash
+function table.mirrored(t) -- hash
local n = { }
for k, v in next, t do
n[v] = k
@@ -1109,6 +2098,17 @@ function table.reversed(t)
end
end
+function table.reverse(t)
+ if t then
+ local n = #t
+ for i=1,floor(n/2) do
+ local j = n - i + 1
+ t[i], t[j] = t[j], t[i]
+ end
+ return t
+ end
+end
+
function table.sequenced(t,sep) -- hash only
if t then
local s, n = { }, 0
@@ -1196,848 +2196,6 @@ end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['l-lpeg'] = {
- version = 1.001,
- comment = "companion to luat-lib.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-
--- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
-
-local lpeg = require("lpeg")
-
--- tracing (only used when we encounter a problem in integration of lpeg in luatex)
-
--- some code will move to unicode and string
-
-local report = texio and texio.write_nl or print
-
--- local lpmatch = lpeg.match
--- local lpprint = lpeg.print
--- local lpp = lpeg.P
--- local lpr = lpeg.R
--- local lps = lpeg.S
--- local lpc = lpeg.C
--- local lpb = lpeg.B
--- local lpv = lpeg.V
--- local lpcf = lpeg.Cf
--- local lpcb = lpeg.Cb
--- local lpcg = lpeg.Cg
--- local lpct = lpeg.Ct
--- local lpcs = lpeg.Cs
--- local lpcc = lpeg.Cc
--- local lpcmt = lpeg.Cmt
--- local lpcarg = lpeg.Carg
-
--- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end
-
--- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end
--- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end
--- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end
--- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end
--- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end
--- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end
--- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end
--- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end
--- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end
--- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end
--- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end
--- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end
--- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end
--- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end
-
-local type, next = type, next
-local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
-
--- Beware, we predefine a bunch of patterns here and one reason for doing so
--- is that we get consistent behaviour in some of the visualizers.
-
-lpeg.patterns = lpeg.patterns or { } -- so that we can share
-local patterns = lpeg.patterns
-
-local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp
-local lpegtype, lpegmatch = lpeg.type, lpeg.match
-
-local utfcharacters = string.utfcharacters
-local utfgmatch = unicode and unicode.utf8.gmatch
-
-local anything = P(1)
-local endofstring = P(-1)
-local alwaysmatched = P(true)
-
-patterns.anything = anything
-patterns.endofstring = endofstring
-patterns.beginofstring = alwaysmatched
-patterns.alwaysmatched = alwaysmatched
-
-local digit, sign = R('09'), S('+-')
-local cr, lf, crlf = P("\r"), P("\n"), P("\r\n")
-local newline = crlf + S("\r\n") -- cr + lf
-local escaped = P("\\") * anything
-local squote = P("'")
-local dquote = P('"')
-local space = P(" ")
-
-local utfbom_32_be = P('\000\000\254\255')
-local utfbom_32_le = P('\255\254\000\000')
-local utfbom_16_be = P('\255\254')
-local utfbom_16_le = P('\254\255')
-local utfbom_8 = P('\239\187\191')
-local utfbom = utfbom_32_be + utfbom_32_le
- + utfbom_16_be + utfbom_16_le
- + utfbom_8
-local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le"
- + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le"
- + utfbom_8 / "utf-8" + alwaysmatched / "unknown"
-
-local utf8next = R("\128\191")
-
-patterns.utf8one = R("\000\127")
-patterns.utf8two = R("\194\223") * utf8next
-patterns.utf8three = R("\224\239") * utf8next * utf8next
-patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next
-patterns.utfbom = utfbom
-patterns.utftype = utftype
-
-local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
-local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false)
-
-patterns.utf8 = utf8char
-patterns.utf8char = utf8char
-patterns.validutf8 = validutf8char
-patterns.validutf8char = validutf8char
-
-local eol = S("\n\r")
-local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
-local whitespace = eol + spacer
-
-patterns.digit = digit
-patterns.sign = sign
-patterns.cardinal = sign^0 * digit^1
-patterns.integer = sign^0 * digit^1
-patterns.unsigned = digit^0 * P('.') * digit^1
-patterns.float = sign^0 * patterns.unsigned
-patterns.cunsigned = digit^0 * P(',') * digit^1
-patterns.cfloat = sign^0 * patterns.cunsigned
-patterns.number = patterns.float + patterns.integer
-patterns.cnumber = patterns.cfloat + patterns.integer
-patterns.oct = P("0") * R("07")^1
-patterns.octal = patterns.oct
-patterns.HEX = P("0x") * R("09","AF")^1
-patterns.hex = P("0x") * R("09","af")^1
-patterns.hexadecimal = P("0x") * R("09","AF","af")^1
-patterns.lowercase = R("az")
-patterns.uppercase = R("AZ")
-patterns.letter = patterns.lowercase + patterns.uppercase
-patterns.space = space
-patterns.tab = P("\t")
-patterns.spaceortab = patterns.space + patterns.tab
-patterns.eol = eol
-patterns.spacer = spacer
-patterns.whitespace = whitespace
-patterns.newline = newline
-patterns.emptyline = newline^1
-patterns.nonspacer = 1 - spacer
-patterns.nonwhitespace = 1 - whitespace
-patterns.equal = P("=")
-patterns.comma = P(",")
-patterns.commaspacer = P(",") * spacer^0
-patterns.period = P(".")
-patterns.colon = P(":")
-patterns.semicolon = P(";")
-patterns.underscore = P("_")
-patterns.escaped = escaped
-patterns.squote = squote
-patterns.dquote = dquote
-patterns.nosquote = (escaped + (1-squote))^0
-patterns.nodquote = (escaped + (1-dquote))^0
-patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"")
-patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"")
-patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble
-patterns.unspacer = ((patterns.spacer^1)/"")^0
-
-patterns.singlequoted = squote * patterns.nosquote * squote
-patterns.doublequoted = dquote * patterns.nodquote * dquote
-patterns.quoted = patterns.doublequoted + patterns.singlequoted
-
-patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1
-patterns.beginline = #(1-newline)
-
--- print(string.unquoted("test"))
--- print(string.unquoted([["t\"est"]]))
--- print(string.unquoted([["t\"est"x]]))
--- print(string.unquoted("\'test\'"))
--- print(string.unquoted('"test"'))
--- print(string.unquoted('"test"'))
-
-local function anywhere(pattern) --slightly adapted from website
- return P { P(pattern) + 1 * V(1) }
-end
-
-lpeg.anywhere = anywhere
-
-function lpeg.instringchecker(p)
- p = anywhere(p)
- return function(str)
- return lpegmatch(p,str) and true or false
- end
-end
-
-function lpeg.splitter(pattern, action)
- return (((1-P(pattern))^1)/action+1)^0
-end
-
-function lpeg.tsplitter(pattern, action)
- return Ct((((1-P(pattern))^1)/action+1)^0)
-end
-
--- probleem: separator can be lpeg and that does not hash too well, but
--- it's quite okay as the key is then not garbage collected
-
-local splitters_s, splitters_m, splitters_t = { }, { }, { }
-
-local function splitat(separator,single)
- local splitter = (single and splitters_s[separator]) or splitters_m[separator]
- if not splitter then
- separator = P(separator)
- local other = C((1 - separator)^0)
- if single then
- local any = anything
- splitter = other * (separator * C(any^0) + "") -- ?
- splitters_s[separator] = splitter
- else
- splitter = other * (separator * other)^0
- splitters_m[separator] = splitter
- end
- end
- return splitter
-end
-
-local function tsplitat(separator)
- local splitter = splitters_t[separator]
- if not splitter then
- splitter = Ct(splitat(separator))
- splitters_t[separator] = splitter
- end
- return splitter
-end
-
-lpeg.splitat = splitat
-lpeg.tsplitat = tsplitat
-
-function string.splitup(str,separator)
- if not separator then
- separator = ","
- end
- return lpegmatch(splitters_m[separator] or splitat(separator),str)
-end
-
-
-local cache = { }
-
-function lpeg.split(separator,str)
- local c = cache[separator]
- if not c then
- c = tsplitat(separator)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-function string.split(str,separator)
- if separator then
- local c = cache[separator]
- if not c then
- c = tsplitat(separator)
- cache[separator] = c
- end
- return lpegmatch(c,str)
- else
- return { str }
- end
-end
-
-local spacing = patterns.spacer^0 * newline -- sort of strip
-local empty = spacing * Cc("")
-local nonempty = Cs((1-spacing)^1) * spacing^-1
-local content = (empty + nonempty)^1
-
-patterns.textline = content
-
-
-local linesplitter = tsplitat(newline)
-
-patterns.linesplitter = linesplitter
-
-function string.splitlines(str)
- return lpegmatch(linesplitter,str)
-end
-
-local utflinesplitter = utfbom^-1 * tsplitat(newline)
-
-patterns.utflinesplitter = utflinesplitter
-
-function string.utfsplitlines(str)
- return lpegmatch(utflinesplitter,str or "")
-end
-
-local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0)
-local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0)
-
-function string.utfsplit(str,ignorewhitespace) -- new
- if ignorewhitespace then
- return lpegmatch(utfcharsplitter_iws,str or "")
- else
- return lpegmatch(utfcharsplitter_ows,str or "")
- end
-end
-
--- inspect(string.utfsplit("a b c d"))
--- inspect(string.utfsplit("a b c d",true))
-
--- -- alternative 1: 0.77
---
--- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0)
---
--- function string.utflength(str)
--- return #lpegmatch(utfcharcounter,str or "")
--- end
---
--- -- alternative 2: 1.70
---
--- local n = 0
---
--- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow
---
--- function string.utflength(str)
--- n = 0
--- lpegmatch(utfcharcounter,str or "")
--- return n
--- end
---
--- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
-
-local n = 0
-
-local utfcharcounter = utfbom^-1 * Cs ( (
- Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
- + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
- + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
- + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
-)^0 )
-
-function string.utflength(str)
- n = 0
- lpegmatch(utfcharcounter,str or "")
- return n
-end
-
-
-local cache = { }
-
-function lpeg.checkedsplit(separator,str)
- local c = cache[separator]
- if not c then
- separator = P(separator)
- local other = C((1 - separator)^1)
- c = Ct(separator^0 * other * (separator^1 * other)^0)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-function string.checkedsplit(str,separator)
- local c = cache[separator]
- if not c then
- separator = P(separator)
- local other = C((1 - separator)^1)
- c = Ct(separator^0 * other * (separator^1 * other)^0)
- cache[separator] = c
- end
- return lpegmatch(c,str)
-end
-
-
-local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
-local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
-local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
-
-local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
-
-patterns.utf8byte = utf8byte
-
-
-
-local cache = { }
-
-function lpeg.stripper(str)
- if type(str) == "string" then
- local s = cache[str]
- if not s then
- s = Cs(((S(str)^1)/"" + 1)^0)
- cache[str] = s
- end
- return s
- else
- return Cs(((str^1)/"" + 1)^0)
- end
-end
-
-local cache = { }
-
-function lpeg.keeper(str)
- if type(str) == "string" then
- local s = cache[str]
- if not s then
- s = Cs((((1-S(str))^1)/"" + 1)^0)
- cache[str] = s
- end
- return s
- else
- return Cs((((1-str)^1)/"" + 1)^0)
- end
-end
-
-function lpeg.frontstripper(str) -- or pattern (yet undocumented)
- return (P(str) + P(true)) * Cs(anything^0)
-end
-
-function lpeg.endstripper(str) -- or pattern (yet undocumented)
- return Cs((1 - P(str) * endofstring)^0)
-end
-
--- Just for fun I looked at the used bytecode and
--- p = (p and p + pp) or pp gets one more (testset).
-
-function lpeg.replacer(one,two,makefunction)
- local pattern
- if type(one) == "table" then
- local no = #one
- local p = P(false)
- if no == 0 then
- for k, v in next, one do
- p = p + P(k) / v
- end
- pattern = Cs((p + 1)^0)
- elseif no == 1 then
- local o = one[1]
- one, two = P(o[1]), o[2]
- -- pattern = Cs(((1-one)^1 + one/two)^0)
- pattern = Cs((one/two + 1)^0)
- else
- for i=1,no do
- local o = one[i]
- p = p + P(o[1]) / o[2]
- end
- pattern = Cs((p + 1)^0)
- end
- else
- one = P(one)
- two = two or ""
- -- pattern = Cs(((1-one)^1 + one/two)^0)
- pattern = Cs((one/two +1)^0)
- end
- if makefunction then
- return function(str)
- return lpegmatch(pattern,str)
- end
- else
- return pattern
- end
-end
-
-function lpeg.finder(lst,makefunction)
- local pattern
- if type(lst) == "table" then
- local p = P(false)
- for i=1,#lst do
- p = p + P(lst[i])
- end
- pattern = (p + 1)^0
- else
- pattern = (P(lst) + 1)^0
- end
- if makefunction then
- return function(str)
- return lpegmatch(pattern,str)
- end
- else
- return pattern
- end
-end
-
--- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
--- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
--- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
-
-local splitters_f, splitters_s = { }, { }
-
-function lpeg.firstofsplit(separator) -- always return value
- local splitter = splitters_f[separator]
- if not splitter then
- separator = P(separator)
- splitter = C((1 - separator)^0)
- splitters_f[separator] = splitter
- end
- return splitter
-end
-
-function lpeg.secondofsplit(separator) -- nil if not split
- local splitter = splitters_s[separator]
- if not splitter then
- separator = P(separator)
- splitter = (1 - separator)^0 * separator * C(anything^0)
- splitters_s[separator] = splitter
- end
- return splitter
-end
-
-function lpeg.balancer(left,right)
- left, right = P(left), P(right)
- return P { left * ((1 - left - right) + V(1))^0 * right }
-end
-
-
-
-local nany = utf8char/""
-
-function lpeg.counter(pattern)
- pattern = Cs((P(pattern)/" " + nany)^0)
- return function(str)
- return #lpegmatch(pattern,str)
- end
-end
-
-if utfgmatch then
-
- function lpeg.count(str,what) -- replaces string.count
- if type(what) == "string" then
- local n = 0
- for _ in utfgmatch(str,what) do
- n = n + 1
- end
- return n
- else -- 4 times slower but still faster than / function
- return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
- end
- end
-
-else
-
- local cache = { }
-
- function lpeg.count(str,what) -- replaces string.count
- if type(what) == "string" then
- local p = cache[what]
- if not p then
- p = Cs((P(what)/" " + nany)^0)
- cache[p] = p
- end
- return #lpegmatch(p,str)
- else -- 4 times slower but still faster than / function
- return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
- end
- end
-
-end
-
-local patterns_escapes = { -- also defines in l-string
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%)", [")"] = "%)",
- -- ["{"] = "%{", ["}"] = "%}"
- -- ["^"] = "%^", ["$"] = "%$",
-}
-
-local simple_escapes = { -- also defines in l-string
- ["-"] = "%-",
- ["."] = "%.",
- ["?"] = ".",
- ["*"] = ".*",
-}
-
-local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0)
-local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0)
-
-function string.escapedpattern(str,simple)
- return lpegmatch(simple and s or p,str)
-end
-
--- utf extensies
-
-lpeg.UP = lpeg.P
-
-if utfcharacters then
-
- function lpeg.US(str)
- local p = P(false)
- for uc in utfcharacters(str) do
- p = p + P(uc)
- end
- return p
- end
-
-
-elseif utfgmatch then
-
- function lpeg.US(str)
- local p = P(false)
- for uc in utfgmatch(str,".") do
- p = p + P(uc)
- end
- return p
- end
-
-else
-
- function lpeg.US(str)
- local p = P(false)
- local f = function(uc)
- p = p + P(uc)
- end
- lpegmatch((utf8char/f)^0,str)
- return p
- end
-
-end
-
-local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture
-
-local utfchar = unicode and unicode.utf8 and unicode.utf8.char
-
-function lpeg.UR(str,more)
- local first, last
- if type(str) == "number" then
- first = str
- last = more or first
- else
- first, last = lpegmatch(range,str)
- if not last then
- return P(str)
- end
- end
- if first == last then
- return P(str)
- elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium
- local p = P(false)
- for i=first,last do
- p = p + P(utfchar(i))
- end
- return p -- nil when invalid range
- else
- local f = function(b)
- return b >= first and b <= last
- end
- -- tricky, these nested captures
- return utf8byte / f -- nil when invalid range
- end
-end
-
--- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω"))
-
-
-
-function lpeg.is_lpeg(p)
- return p and lpegtype(p) == "pattern"
-end
-
-function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
- if type(list) ~= "table" then
- list = { list, ... }
- end
- -- table.sort(list) -- longest match first
- local p = P(list[1])
- for l=2,#list do
- p = p + P(list[l])
- end
- return p
-end
-
--- For the moment here, but it might move to utilities. Beware, we need to
--- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
--- loop back from the end cq. prepend.
-
-local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency!
-
-function lpeg.append(list,pp,delayed,checked)
- local p = pp
- if #list > 0 then
- local keys = fastcopy(list)
- sort(keys)
- for i=#keys,1,-1 do
- local k = keys[i]
- if p then
- p = P(k) + p
- else
- p = P(k)
- end
- end
- elseif delayed then -- hm, it looks like the lpeg parser resolves anyway
- local keys = sortedkeys(list)
- if p then
- for i=1,#keys,1 do
- local k = keys[i]
- local v = list[k]
- p = P(k)/list + p
- end
- else
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- p = P(k) + p
- else
- p = P(k)
- end
- end
- if p then
- p = p / list
- end
- end
- elseif checked then
- -- problem: substitution gives a capture
- local keys = sortedkeys(list)
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- if k == v then
- p = P(k) + p
- else
- p = P(k)/v + p
- end
- else
- if k == v then
- p = P(k)
- else
- p = P(k)/v
- end
- end
- end
- else
- local keys = sortedkeys(list)
- for i=1,#keys do
- local k = keys[i]
- local v = list[k]
- if p then
- p = P(k)/v + p
- else
- p = P(k)/v
- end
- end
- end
- return p
-end
-
--- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true))
--- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true))
-
--- function lpeg.exact_match(words,case_insensitive)
--- local pattern = concat(words)
--- if case_insensitive then
--- local pattern = S(upper(characters)) + S(lower(characters))
--- local list = { }
--- for i=1,#words do
--- list[lower(words[i])] = true
--- end
--- return Cmt(pattern^1, function(_,i,s)
--- return list[lower(s)] and i
--- end)
--- else
--- local pattern = S(concat(words))
--- local list = { }
--- for i=1,#words do
--- list[words[i]] = true
--- end
--- return Cmt(pattern^1, function(_,i,s)
--- return list[s] and i
--- end)
--- end
--- end
-
--- experiment:
-
-local function make(t)
- local p
--- for k, v in next, t do
- for k, v in table.sortedhash(t) do
- if not p then
- if next(v) then
- p = P(k) * make(v)
- else
- p = P(k)
- end
- else
- if next(v) then
- p = p + P(k) * make(v)
- else
- p = p + P(k)
- end
- end
- end
- return p
-end
-
-function lpeg.utfchartabletopattern(list)
- local tree = { }
- for i=1,#list do
- local t = tree
- for c in gmatch(list[i],".") do
- if not t[c] then
- t[c] = { }
- end
- t = t[c]
- end
- end
- return make(tree)
-end
-
--- inspect ( lpeg.utfchartabletopattern {
--- utfchar(0x00A0), -- nbsp
--- utfchar(0x2000), -- enquad
--- utfchar(0x2001), -- emquad
--- utfchar(0x2002), -- enspace
--- utfchar(0x2003), -- emspace
--- utfchar(0x2004), -- threeperemspace
--- utfchar(0x2005), -- fourperemspace
--- utfchar(0x2006), -- sixperemspace
--- utfchar(0x2007), -- figurespace
--- utfchar(0x2008), -- punctuationspace
--- utfchar(0x2009), -- breakablethinspace
--- utfchar(0x200A), -- hairspace
--- utfchar(0x200B), -- zerowidthspace
--- utfchar(0x202F), -- narrownobreakspace
--- utfchar(0x205F), -- math thinspace
--- } )
-
--- handy from within tex:
-
-local lpegmatch = lpeg.match
-
-local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
-
-function string.tformat(fmt,...)
- return format(lpegmatch(replacer,fmt),...)
-end
-
--- strips leading and trailing spaces and collapsed all other spaces
-
-local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0)
-
-function string.collapsespaces(str)
- return lpegmatch(pattern,str)
-end
-
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
if not modules then modules = { } end modules ['l-io'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
@@ -2368,7 +2526,7 @@ function io.readstring(f,n,m)
f:seek("set",n)
n = m
end
- local str = gsub(f:read(n),"%z","")
+ local str = gsub(f:read(n),"\000","")
return str
end
@@ -2418,10 +2576,129 @@ local lpegmatch = lpeg.match
number = number or { }
local number = number
--- a,b,c,d,e,f = number.toset(100101)
+if bit32 then
+
+ local btest, bor = bit32.btest, bit32.bor
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ number.hasbit = btest
+ number.setbit = bor
+
+ function number.setbit(x,p)
+ return btest(x,p) and x or x + p
+ end
+
+ function number.clearbit(x,p)
+ return btest(x,p) and x - p or x
+ end
+
+else
+
+ -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ...
+ return x % (p + p) >= p
+ end
+
+ function number.setbit(x, p)
+ return (x % (p + p) >= p) and x or x + p
+ end
+
+ function number.clearbit(x, p)
+ return (x % (p + p) >= p) and x - p or x
+ end
+
+end
+
+-- print(number.tobitstring(8))
+-- print(number.tobitstring(14))
+-- print(number.tobitstring(66))
+-- print(number.tobitstring(0x00))
+-- print(number.tobitstring(0xFF))
+-- print(number.tobitstring(46260767936,4))
+
+if bit32 then
+
+ local bextract = bit32.extract
+
+ local t = {
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ }
+
+ function number.tobitstring(b,m)
+ -- if really needed we can speed this one up
+ -- because small numbers need less extraction
+ local n = 32
+ for i=0,31 do
+ local v = bextract(b,i)
+ local k = 32 - i
+ if v == 1 then
+ n = k
+ t[k] = "1"
+ else
+ t[k] = "0"
+ end
+ end
+ if m then
+ m = 33 - m * 8
+ if m < 1 then
+ m = 1
+ end
+ return concat(t,"",m)
+ elseif n < 8 then
+ return concat(t)
+ elseif n < 16 then
+ return concat(t,"",9)
+ elseif n < 24 then
+ return concat(t,"",17)
+ else
+ return concat(t,"",25)
+ end
+ end
+
+else
+
+ function number.tobitstring(n,m)
+ if n > 0 then
+ local t = { }
+ while n > 0 do
+ insert(t,1,n % 2 > 0 and 1 or 0)
+ n = floor(n/2)
+ end
+ local nn = 8 - #t % 8
+ if nn > 0 and nn < 8 then
+ for i=1,nn do
+ insert(t,1,0)
+ end
+ end
+ if m then
+ m = m * 8 - #t
+ if m > 0 then
+ insert(t,1,rep("0",m))
+ end
+ end
+ return concat(t)
+ elseif m then
+ rep("00000000",m)
+ else
+ return "00000000"
+ end
+ end
-function number.toset(n)
- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+end
+
+function number.valid(str,default)
+ return tonumber(str) or default or nil
end
function number.toevenhex(n)
@@ -2433,86 +2710,59 @@ function number.toevenhex(n)
end
end
--- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
--- on
+-- a,b,c,d,e,f = number.toset(100101)
+--
+-- function number.toset(n)
+-- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+-- end
+--
+-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
+-- -- on
--
-- for i=1,1000000 do
-- local a,b,c,d,e,f,g,h = number.toset(12345678)
-- local a,b,c,d = number.toset(1234)
-- local a,b,c = number.toset(123)
+-- local a,b,c = number.toset("123")
-- end
---
--- of course dedicated "(.)(.)(.)(.)" matches are even faster
-local one = lpeg.C(1-lpeg.S(''))^1
+local one = lpeg.C(1-lpeg.S('')/tonumber)^1
function number.toset(n)
return lpegmatch(one,tostring(n))
end
-function number.bits(n,zero)
- local t, i = { }, (zero and 0) or 1
- while n > 0 do
+-- function number.bits(n,zero)
+-- local t, i = { }, (zero and 0) or 1
+-- while n > 0 do
+-- local m = n % 2
+-- if m > 0 then
+-- insert(t,1,i)
+-- end
+-- n = floor(n/2)
+-- i = i + 1
+-- end
+-- return t
+-- end
+--
+-- -- a bit faster
+
+local function bits(n,i,...)
+ if n > 0 then
local m = n % 2
+ local n = floor(n/2)
if m > 0 then
- insert(t,1,i)
- end
- n = floor(n/2)
- i = i + 1
- end
- return t
-end
-
-
-function number.bit(p)
- return 2 ^ (p - 1) -- 1-based indexing
-end
-
-function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ...
- return x % (p + p) >= p
-end
-
-function number.setbit(x, p)
- return (x % (p + p) >= p) and x or x + p
-end
-
-function number.clearbit(x, p)
- return (x % (p + p) >= p) and x - p or x
-end
-
-
-function number.tobitstring(n,m)
- if n == 0 then
- if m then
- rep("00000000",m)
+ return bits(n, i+1, i, ...)
else
- return "00000000"
+ return bits(n, i+1, ...)
end
else
- local t = { }
- while n > 0 do
- insert(t,1,n % 2 > 0 and 1 or 0)
- n = floor(n/2)
- end
- local nn = 8 - #t % 8
- if nn > 0 and nn < 8 then
- for i=1,nn do
- insert(t,1,0)
- end
- end
- if m then
- m = m * 8 - #t
- if m > 0 then
- insert(t,1,rep("0",m))
- end
- end
- return concat(t)
+ return ...
end
end
-
-function number.valid(str,default)
- return tonumber(str) or default or nil
+function number.bits(n)
+ return { bits(n,1) }
end
@@ -3104,25 +3354,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1
local pattern = C((noslashes^0 * slashes^1)^1)
local function pathpart(name,default)
- return lpegmatch(pattern,name) or default or ""
+ return name and lpegmatch(pattern,name) or default or ""
end
local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1
local function basename(name)
- return lpegmatch(pattern,name) or name
+ return name and lpegmatch(pattern,name) or name
end
local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0
local function nameonly(name)
- return lpegmatch(pattern,name) or name
+ return name and lpegmatch(pattern,name) or name
end
local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1
local function suffixonly(name)
- return lpegmatch(pattern,name) or ""
+ return name and lpegmatch(pattern,name) or ""
end
file.pathpart = pathpart
@@ -3153,7 +3403,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture
local pattern_d = path * rest
function file.splitname(str,splitdrive)
- if splitdrive then
+ if not str then
+ -- error
+ elseif splitdrive then
return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix
else
return lpegmatch(pattern_b,str) -- returns path, base, suffix
@@ -3161,34 +3413,36 @@ function file.splitname(str,splitdrive)
end
function file.splitbase(str)
- return lpegmatch(pattern_d,str) -- returns path, base+suffix
+ return str and lpegmatch(pattern_d,str) -- returns path, base+suffix
end
function file.nametotable(str,splitdrive) -- returns table
- local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
- if splitdrive then
- return {
- path = path,
- drive = drive,
- subpath = subpath,
- name = name,
- base = base,
- suffix = suffix,
- }
- else
- return {
- path = path,
- name = name,
- base = base,
- suffix = suffix,
- }
+ if str then
+ local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
+ if splitdrive then
+ return {
+ path = path,
+ drive = drive,
+ subpath = subpath,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ else
+ return {
+ path = path,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ end
end
end
local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1)
function file.removesuffix(name)
- return lpegmatch(pattern,name)
+ return name and lpegmatch(pattern,name)
end
-- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1
@@ -3205,8 +3459,8 @@ end
local suffix = period/"" * (1-period-slashes)^1 * -1
local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix)
-function file.addsuffix(filename, suffix, criterium)
- if not suffix or suffix == "" then
+function file.addsuffix(filename,suffix,criterium)
+ if not filename or not suffix or suffix == "" then
return filename
elseif criterium == true then
return filename .. "." .. suffix
@@ -3252,7 +3506,7 @@ local suffix = period * (1-period-slashes)^1 * -1
local pattern = Cs((1-suffix)^0)
function file.replacesuffix(name,suffix)
- if suffix and suffix ~= "" then
+ if name and suffix and suffix ~= "" then
return lpegmatch(pattern,name) .. "." .. suffix
else
return name
@@ -3261,10 +3515,10 @@ end
--
-local reslasher = lpeg.replacer(S("\\"),"/")
+local reslasher = lpeg.replacer(P("\\"),"/")
function file.reslash(str)
- return lpegmatch(reslasher,str)
+ return str and lpegmatch(reslasher,str)
end
-- We should be able to use:
@@ -3280,7 +3534,9 @@ end
-- variant:
function file.is_writable(name)
- if lfs.isdir(name) then
+ if not name then
+ -- error
+ elseif lfs.isdir(name) then
name = name .. "/m_t_x_t_e_s_t.tmp"
local f = io.open(name,"wb")
if f then
@@ -3308,24 +3564,32 @@ end
local readable = P("r") * Cc(true)
function file.is_readable(name)
- local a = attributes(name)
- return a and lpegmatch(readable,a.permissions) or false
+ if name then
+ local a = attributes(name)
+ return a and lpegmatch(readable,a.permissions) or false
+ else
+ return false
+ end
end
file.isreadable = file.is_readable -- depricated
file.iswritable = file.is_writable -- depricated
function file.size(name)
- local a = attributes(name)
- return a and a.size or 0
+ if name then
+ local a = attributes(name)
+ return a and a.size or 0
+ else
+ return 0
+ end
end
function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split)
- return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
+ return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
end
function file.joinpath(tab,separator) -- table
- return concat(tab,separator or io.pathseparator) -- can have trailing //
+ return tab and concat(tab,separator or io.pathseparator) -- can have trailing //
end
local stripper = Cs(P(fwslash)^0/"" * reslasher)
@@ -3333,14 +3597,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon
local isroot = fwslash^1 * -1
local hasroot = fwslash^1
-function file.join(...) -- rather dirty
+local deslasher = lpeg.replacer(S("\\/")^1,"/")
+
+-- If we have a network or prefix then there is a change that we end up with two
+-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2)
+-- split and rebuild as url. Of course we could assume no network paths (which
+-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but
+-- then we still have to deal with urls ... anyhow, multiple // are never a real
+-- problem but just ugly.
+
+function file.join(...)
local lst = { ... }
local one = lst[1]
if lpegmatch(isnetwork,one) then
- local two = lpegmatch(reslasher,concat(lst,"/",2))
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
return one .. "/" .. two
elseif lpegmatch(isroot,one) then
- local two = lpegmatch(reslasher,concat(lst,"/",2))
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
if lpegmatch(hasroot,two) then
return two
else
@@ -3349,7 +3622,7 @@ function file.join(...) -- rather dirty
elseif one == "" then
return lpegmatch(stripper,concat(lst,"/",2))
else
- return lpegmatch(reslasher,concat(lst,"/"))
+ return lpegmatch(deslasher,concat(lst,"/"))
end
end
@@ -3378,6 +3651,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C
local absolute = fwslash
function file.collapsepath(str,anchor)
+ if not str then
+ return
+ end
if anchor and not lpegmatch(anchors,str) then
str = getcurrentdir() .. "/" .. str
end
@@ -3387,7 +3663,6 @@ function file.collapsepath(str,anchor)
return lpegmatch(reslasher,str)
end
local starter, oldelements = lpegmatch(splitstarter,str)
--- inspect(oldelements)
local newelements = { }
local i = #oldelements
while i > 0 do
@@ -3441,11 +3716,13 @@ local whatever = P("-")^0 / ""
local pattern_b = Cs(whatever * (1 - whatever * -1)^1)
function file.robustname(str,strict)
- str = lpegmatch(pattern_a,str) or str
- if strict then
- return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
- else
- return str
+ if str then
+ str = lpegmatch(pattern_a,str) or str
+ if strict then
+ return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
+ else
+ return str
+ end
end
end
@@ -3453,7 +3730,9 @@ file.readdata = io.loaddata
file.savedata = io.savedata
function file.copy(oldname,newname)
- file.savedata(newname,io.loaddata(oldname))
+ if oldname and newname then
+ file.savedata(newname,io.loaddata(oldname))
+ end
end
-- also rewrite previous
@@ -3474,11 +3753,11 @@ lpeg.patterns.rootbased = rootbased
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return lpegmatch(qualified,filename) ~= nil
+ return filename and lpegmatch(qualified,filename) ~= nil
end
function file.is_rootbased_path(filename)
- return lpegmatch(rootbased,filename) ~= nil
+ return filename and lpegmatch(rootbased,filename) ~= nil
end
-- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end
@@ -3500,8 +3779,10 @@ end
-- for myself:
function file.strip(name,dir)
- local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
- return a ~= "" and a or name
+ if name then
+ local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
+ return a ~= "" and a or name
+ end
end
-- local debuglist = {
@@ -3943,7 +4224,7 @@ if not modules then modules = { } end modules ['l-dir'] = {
-- dir.expandname will be merged with cleanpath and collapsepath
-local type = type
+local type, select = type, select
local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub
local concat, insert, remove = table.concat, table.insert, table.remove
local lpegmatch = lpeg.match
@@ -4165,15 +4446,15 @@ local onwindows = os.type == "windows" or find(os.getenv("PATH"),";")
if onwindows then
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
- if s ~= "" then
- if str ~= "" then
- str = str .. "/" .. s
- else
- str = s
- end
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
+ if s == "" then
+ -- skip
+ elseif str == "" then
+ str = s
+ else
+ str = str .. "/" .. s
end
end
local first, middle, last
@@ -4222,9 +4503,9 @@ if onwindows then
else
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
if s and s ~= "" then -- we catch nil and false
if str ~= "" then
str = str .. "/" .. s
@@ -4424,29 +4705,45 @@ if not modules then modules = { } end modules ['l-unicode'] = {
-- todo: utf.sub replacement (used in syst-aux)
-local concat = table.concat
+-- we put these in the utf namespace:
+
+utf = utf or (unicode and unicode.utf8) or { }
+
+utf.characters = utf.characters or string.utfcharacters
+utf.values = utf.values or string.utfvalues
+
+-- string.utfvalues
+-- string.utfcharacters
+-- string.characters
+-- string.characterpairs
+-- string.bytes
+-- string.bytepairs
+
local type = type
-local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
+local char, byte, format, sub = string.char, string.byte, string.format, string.sub
+local concat = table.concat
+local P, C, R, Cs, Ct, Cmt, Cc, Carg = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg
local lpegmatch, patterns = lpeg.match, lpeg.patterns
-local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
-local utfsplitlines = string.utfsplitlines
-if not unicode then
+local bytepairs = string.bytepairs
- unicode = { }
+local finder = lpeg.finder
+local replacer = lpeg.replacer
-end
+local utfvalues = utf.values
+local utfgmatch = utf.gmatch -- not always present
-local unicode = unicode
+local p_utftype = patterns.utftype
+local p_utfoffset = patterns.utfoffset
+local p_utf8char = patterns.utf8char
+local p_utf8byte = patterns.utf8byte
+local p_utfbom = patterns.utfbom
+local p_newline = patterns.newline
+local p_whitespace = patterns.whitespace
-utf = utf or unicode.utf8
-
-if not utf then
+if not unicode then
- utf8 = { }
- unicode.utf8 = utf8
- utf = utf8
+ unicode = { utf = utf } -- for a while
end
@@ -4503,64 +4800,13 @@ if not utf.byte then
end
-if not utf.sub then
-
- local utf8char = patterns.utf8char
-
- -- inefficient as lpeg just copies ^n
-
- -- local function sub(str,start,stop)
- -- local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
- -- inspect(pattern)
- -- return lpegmatch(pattern,str) or ""
- -- end
-
- local b, e, n, first, last = 0, 0, 0, 0, 0
-
- local function slide(s,p)
- n = n + 1
- if n == first then
- b = p
- if not last then
- return nil
- end
- end
- if n == last then
- e = p
- return nil
- else
- return p
- end
- end
-
- local pattern = Cmt(utf8char,slide)^0
-
- function utf.sub(str,start,stop) -- todo: from the end
- if not start then
- return str
- end
- b, e, n, first, last = 0, 0, 0, start, stop
- lpegmatch(pattern,str)
- if not stop then
- return sub(str,b)
- else
- return sub(str,b,e)
- end
- end
-
- -- print(utf.sub("Hans Hagen is my name"))
- -- print(utf.sub("Hans Hagen is my name",5))
- -- print(utf.sub("Hans Hagen is my name",5,10))
-
-end
-
local utfchar, utfbyte = utf.char, utf.byte
-- As we want to get rid of the (unmaintained) utf library we implement our own
-- variants (in due time an independent module):
-function unicode.filetype(data)
- return data and lpegmatch(utftype,data) or "unknown"
+function utf.filetype(data)
+ return data and lpegmatch(p_utftype,data) or "unknown"
end
local toentities = Cs (
@@ -4647,7 +4893,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
-function string.toutf(s)
+function string.toutf(s) -- in string namespace
return lpegmatch(pattern,s) or s -- todo: utf32
end
@@ -4663,26 +4909,269 @@ local validatedutf = Cs (
patterns.validatedutf = validatedutf
-function string.validutf(str)
- return lpegmatch(validatedutf,str)
+function utf.is_valid(str)
+ return type(str) == "string" and lpegmatch(validatedutf,str) or false
end
+if not utf.len then
-utf.length = string.utflength
-utf.split = string.utfsplit
-utf.splitines = string.utfsplitlines
-utf.valid = string.validutf
+ -- -- alternative 1: 0.77
+ --
+ -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0)
+ --
+ -- function utf.len(str)
+ -- return #lpegmatch(utfcharcounter,str or "")
+ -- end
+ --
+ -- -- alternative 2: 1.70
+ --
+ -- local n = 0
+ --
+ -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow
+ --
+ -- function utf.length(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+ --
+ -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
+
+ -- local n = 0
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * (
+ -- -- patterns.utf8one ^1 * Cc(1)
+ -- -- + patterns.utf8two ^1 * Cc(2)
+ -- -- + patterns.utf8three^1 * Cc(3)
+ -- -- + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end
+ -- -- )^0 ) -- just as many captures as below
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( (
+ -- -- (Cmt(patterns.utf8one ^1,function(_,_,s) n = n + #s return true end))
+ -- -- + (Cmt(patterns.utf8two ^1,function(_,_,s) n = n + #s/2 return true end))
+ -- -- + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end))
+ -- -- + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end))
+ -- -- )^0 ) -- not interesting as it creates strings but sometimes faster
+ --
+ -- -- The best so far:
+ --
+ -- local utfcharcounter = utfbom^-1 * P ( (
+ -- Cp() * (patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
+ -- + Cp() * (patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
+ -- + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
+ -- + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
+ -- )^0 )
+
+ -- function utf.len(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+
+ local n, f = 0, 1
+
+ local utfcharcounter = patterns.utfbom^-1 * Cmt (
+ Cc(1) * patterns.utf8one ^1
+ + Cc(2) * patterns.utf8two ^1
+ + Cc(3) * patterns.utf8three^1
+ + Cc(4) * patterns.utf8four ^1,
+ function(_,t,d) -- due to Cc no string captures, so faster
+ n = n + (t - f)/d
+ f = t
+ return true
+ end
+ )^0
+
+ function utf.len(str)
+ n, f = 0, 1
+ lpegmatch(utfcharcounter,str or "")
+ return n
+ end
-if not utf.len then
- utf.len = utf.length
end
--- a replacement for simple gsubs:
+utf.length = utf.len
+
+if not utf.sub then
+
+ -- inefficient as lpeg just copies ^n
+
+ -- local function sub(str,start,stop)
+ -- local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1))
+ -- inspect(pattern)
+ -- return lpegmatch(pattern,str) or ""
+ -- end
+
+ -- local b, e, n, first, last = 0, 0, 0, 0, 0
+ --
+ -- local function slide(s,p)
+ -- n = n + 1
+ -- if n == first then
+ -- b = p
+ -- if not last then
+ -- return nil
+ -- end
+ -- end
+ -- if n == last then
+ -- e = p
+ -- return nil
+ -- else
+ -- return p
+ -- end
+ -- end
+ --
+ -- local pattern = Cmt(p_utf8char,slide)^0
+ --
+ -- function utf.sub(str,start,stop) -- todo: from the end
+ -- if not start then
+ -- return str
+ -- end
+ -- b, e, n, first, last = 0, 0, 0, start, stop
+ -- lpegmatch(pattern,str)
+ -- if not stop then
+ -- return sub(str,b)
+ -- else
+ -- return sub(str,b,e-1)
+ -- end
+ -- end
+
+ -- print(utf.sub("Hans Hagen is my name"))
+ -- print(utf.sub("Hans Hagen is my name",5))
+ -- print(utf.sub("Hans Hagen is my name",5,10))
+
+ local utflength = utf.length
+
+ -- also negative indices, upto 10 times slower than a c variant
+
+ local b, e, n, first, last = 0, 0, 0, 0, 0
+
+ local function slide_zero(s,p)
+ n = n + 1
+ if n >= last then
+ e = p - 1
+ else
+ return p
+ end
+ end
+
+ local function slide_one(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ end
+ if n >= last then
+ e = p - 1
+ else
+ return p
+ end
+ end
+
+ local function slide_two(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ else
+ return true
+ end
+ end
+
+ local pattern_zero = Cmt(p_utf8char,slide_zero)^0
+ local pattern_one = Cmt(p_utf8char,slide_one )^0
+ local pattern_two = Cmt(p_utf8char,slide_two )^0
+
+ function utf.sub(str,start,stop)
+ if not start then
+ return str
+ end
+ if start == 0 then
+ start = 1
+ end
+ if not stop then
+ if start < 0 then
+ local l = utflength(str) -- we can inline this function if needed
+ start = l + start
+ else
+ start = start - 1
+ end
+ b, n, first = 0, 0, start
+ lpegmatch(pattern_two,str)
+ if n >= first then
+ return sub(str,b)
+ else
+ return ""
+ end
+ end
+ if start < 0 or stop < 0 then
+ local l = utf.length(str)
+ if start < 0 then
+ start = l + start
+ if start <= 0 then
+ start = 1
+ else
+ start = start + 1
+ end
+ end
+ if stop < 0 then
+ stop = l + stop
+ if stop == 0 then
+ stop = 1
+ else
+ stop = stop + 1
+ end
+ end
+ end
+ if start > stop then
+ return ""
+ elseif start > 1 then
+ b, e, n, first, last = 0, 0, 0, start - 1, stop
+ lpegmatch(pattern_one,str)
+ if n >= first and e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ else
+ b, e, n, last = 1, 0, 0, stop
+ lpegmatch(pattern_zero,str)
+ if e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ end
+ end
-local utf8char = patterns.utf8char
+ -- local n = 100000
+ -- local str = string.rep("123456àáâãäå",100)
+ --
+ -- for i=-15,15,1 do
+ -- for j=-15,15,1 do
+ -- if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then
+ -- print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j))
+ -- end
+ -- end
+ -- if utf.xsub(str,i) ~= utf.sub(str,i) then
+ -- print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i))
+ -- end
+ -- end
+
+ -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7))
+ -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7))
+ -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9))
+ -- print(" 4 ",utf.xsub(str, 4 ),utf.sub(str, 4 ))
+ -- print(" 0 ",utf.xsub(str, 0 ),utf.sub(str, 0 ))
+ -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0))
+ -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4))
+ -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0))
+ -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0))
+ -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3))
+ -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3))
+ -- print("-3 ",utf.xsub(str,-3 ),utf.sub(str,-3 ))
+
+end
+
+-- a replacement for simple gsubs:
function utf.remapper(mapping)
- local pattern = Cs((utf8char/mapping)^0)
+ local pattern = Cs((p_utf8char/mapping)^0)
return function(str)
if not str or str == "" then
return ""
@@ -4695,55 +5184,113 @@ end
-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" }
-- print(remap("abcd 1234 abcd"))
+--
+
+function utf.replacer(t) -- no precheck, always string builder
+ local r = replacer(t,false,false,true)
+ return function(str)
+ return lpegmatch(r,str)
+ end
+end
+
+function utf.subtituter(t) -- with precheck and no building if no match
+ local f = finder (t)
+ local r = replacer(t,false,false,true)
+ return function(str)
+ local i = lpegmatch(f,str)
+ if not i then
+ return str
+ elseif i > #str then
+ return str
+ else
+ -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower
+ return lpegmatch(r,str)
+ end
+ end
+end
+
+-- inspect(utf.split("a b c d"))
+-- inspect(utf.split("a b c d",true))
+
+local utflinesplitter = p_utfbom^-1 * lpeg.tsplitat(p_newline)
+local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0)
+local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0)
+local utfcharsplitter_raw = Ct(C(p_utf8char)^0)
+
+patterns.utflinesplitter = utflinesplitter
+
+function utf.splitlines(str)
+ return lpegmatch(utflinesplitter,str or "")
+end
+
+function utf.split(str,ignorewhitespace) -- new
+ if ignorewhitespace then
+ return lpegmatch(utfcharsplitter_iws,str or "")
+ else
+ return lpegmatch(utfcharsplitter_ows,str or "")
+ end
+end
+
+function utf.totable(str) -- keeps bom
+ return lpegmatch(utfcharsplitter_raw,str)
+end
+
-- 0 EF BB BF UTF-8
-- 1 FF FE UTF-16-little-endian
-- 2 FE FF UTF-16-big-endian
-- 3 FF FE 00 00 UTF-32-little-endian
-- 4 00 00 FE FF UTF-32-big-endian
-
-unicode.utfname = {
- [0] = 'utf-8',
- [1] = 'utf-16-le',
- [2] = 'utf-16-be',
- [3] = 'utf-32-le',
- [4] = 'utf-32-be'
-}
-
+--
-- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated
-function unicode.utftype(f)
- local str = f:read(4)
- if not str then
- f:seek('set')
- return 0
- -- elseif find(str,"^%z%z\254\255") then -- depricated
- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
- return 4
- -- elseif find(str,"^\255\254%z%z") then -- depricated
- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
- return 3
- elseif find(str,"^\254\255") then
- f:seek('set',2)
- return 2
- elseif find(str,"^\255\254") then
- f:seek('set',2)
- return 1
- elseif find(str,"^\239\187\191") then
- f:seek('set',3)
- return 0
- else
- f:seek('set')
- return 0
+-- utf.name = {
+-- [0] = 'utf-8',
+-- [1] = 'utf-16-le',
+-- [2] = 'utf-16-be',
+-- [3] = 'utf-32-le',
+-- [4] = 'utf-32-be'
+-- }
+--
+-- function utf.magic(f)
+-- local str = f:read(4)
+-- if not str then
+-- f:seek('set')
+-- return 0
+-- -- elseif find(str,"^%z%z\254\255") then -- depricated
+-- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
+-- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
+-- return 4
+-- -- elseif find(str,"^\255\254%z%z") then -- depricated
+-- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
+-- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
+-- return 3
+-- elseif find(str,"^\254\255") then
+-- f:seek('set',2)
+-- return 2
+-- elseif find(str,"^\255\254") then
+-- f:seek('set',2)
+-- return 1
+-- elseif find(str,"^\239\187\191") then
+-- f:seek('set',3)
+-- return 0
+-- else
+-- f:seek('set')
+-- return 0
+-- end
+-- end
+
+function utf.magic(f) -- not used
+ local str = f:read(4) or ""
+ local off = lpegmatch(p_utfoffset,str)
+ if off < 4 then
+ f:seek('set',off)
end
+ return lpegmatch(p_utftype,str)
end
-
-
local function utf16_to_utf8_be(t)
if type(t) == "string" then
- t = utfsplitlines(str)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4771,7 +5318,7 @@ end
local function utf16_to_utf8_le(t)
if type(t) == "string" then
- t = utfsplitlines(str)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4799,7 +5346,7 @@ end
local function utf32_to_utf8_be(t)
if type(t) == "string" then
- t = utfsplitlines(t)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4824,7 +5371,7 @@ end
local function utf32_to_utf8_le(t)
if type(t) == "string" then
- t = utfsplitlines(t)
+ t = lpegmatch(utflinesplitter,t)
end
local result = { } -- we reuse result
for i=1,#t do
@@ -4847,20 +5394,20 @@ local function utf32_to_utf8_le(t)
return t
end
-unicode.utf32_to_utf8_be = utf32_to_utf8_be
-unicode.utf32_to_utf8_le = utf32_to_utf8_le
-unicode.utf16_to_utf8_be = utf16_to_utf8_be
-unicode.utf16_to_utf8_le = utf16_to_utf8_le
+utf.utf32_to_utf8_be = utf32_to_utf8_be
+utf.utf32_to_utf8_le = utf32_to_utf8_le
+utf.utf16_to_utf8_be = utf16_to_utf8_be
+utf.utf16_to_utf8_le = utf16_to_utf8_le
-function unicode.utf8_to_utf8(t)
- return type(t) == "string" and utfsplitlines(t) or t
+function utf.utf8_to_utf8(t)
+ return type(t) == "string" and lpegmatch(utflinesplitter,t) or t
end
-function unicode.utf16_to_utf8(t,endian)
+function utf.utf16_to_utf8(t,endian)
return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t
end
-function unicode.utf32_to_utf8(t,endian)
+function utf.utf32_to_utf8(t,endian)
return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t
end
@@ -4886,7 +5433,7 @@ local function big(c)
end
end
--- function unicode.utf8_to_utf16(str,littleendian)
+-- function utf.utf8_to_utf16(str,littleendian)
-- if littleendian then
-- return char(255,254) .. utfgsub(str,".",little)
-- else
@@ -4897,7 +5444,7 @@ end
local _, l_remap = utf.remapper(little)
local _, b_remap = utf.remapper(big)
-function unicode.utf8_to_utf16(str,littleendian)
+function utf.utf8_to_utf16(str,littleendian)
if littleendian then
return char(255,254) .. lpegmatch(l_remap,str)
else
@@ -4905,31 +5452,71 @@ function unicode.utf8_to_utf16(str,littleendian)
end
end
-function unicode.utfcodes(str)
- local t, n = { }, 0
- for u in utfvalues(str) do
- n = n + 1
- t[n] = format("0x%04X",u)
- end
- return concat(t,separator or " ")
+-- function utf.tocodes(str,separator) -- can be sped up with an lpeg
+-- local t, n = { }, 0
+-- for u in utfvalues(str) do
+-- n = n + 1
+-- t[n] = format("0x%04X",u)
+-- end
+-- return concat(t,separator or " ")
+-- end
+
+local pattern = Cs (
+ (p_utf8byte / function(unicode ) return format( "0x%04X", unicode) end) *
+ (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0
+)
+
+function utf.tocodes(str,separator)
+ return lpegmatch(pattern,str,1,separator or " ")
end
-function unicode.ustring(s)
+function utf.ustring(s)
return format("U+%05X",type(s) == "number" and s or utfbyte(s))
end
-function unicode.xstring(s)
+function utf.xstring(s)
return format("0x%05X",type(s) == "number" and s or utfbyte(s))
end
--
-local pattern = Ct(C(patterns.utf8char)^0)
+local p_nany = p_utf8char / ""
+
+if utfgmatch then
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local n = 0
+ for _ in utfgmatch(str,what) do
+ n = n + 1
+ end
+ return n
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
+
+else
+
+ local cache = { }
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local p = cache[what]
+ if not p then
+ p = Cs((P(what)/" " + p_nany)^0)
+ cache[p] = p
+ end
+ return #lpegmatch(p,str)
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
-function utf.totable(str)
- return lpegmatch(pattern,str)
end
+-- maybe also register as string.utf*
+
end -- of closure
@@ -4990,24 +5577,10 @@ local tables = utilities.tables
local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub
local concat, insert, remove = table.concat, table.insert, table.remove
local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring
-local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring
+local type, next, rawset, tonumber, load, select = type, next, rawset, tonumber, load, select
local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs
local serialize = table.serialize
--- function tables.definetable(target) -- defines undefined tables
--- local composed, t, n = nil, { }, 0
--- for name in gmatch(target,"([^%.]+)") do
--- n = n + 1
--- if composed then
--- composed = composed .. "." .. name
--- else
--- composed = name
--- end
--- t[n] = format("%s = %s or { }",composed,composed)
--- end
--- return concat(t,"\n")
--- end
-
local splitter = lpeg.tsplitat(".")
function tables.definetable(target,nofirst,nolast) -- defines undefined tables
@@ -5036,13 +5609,13 @@ end
-- local t = tables.definedtable("a","b","c","d")
function tables.definedtable(...)
- local l = { ... }
local t = _G
- for i=1,#l do
- local tl = t[l[i]]
+ for i=1,select("#",...) do
+ local li = select(i,...)
+ local tl = t[li]
if not tl then
tl = { }
- t[l[i]] = tl
+ t[li] = tl
end
t = tl
end
@@ -5235,7 +5808,7 @@ function table.deserialize(str)
if not str or str == "" then
return
end
- local code = loadstring(str)
+ local code = load(str)
if not code then
return
end
@@ -5252,7 +5825,7 @@ function table.load(filename)
if filename then
local t = io.loaddata(filename)
if t and t ~= "" then
- t = loadstring(t)
+ t = load(t)
if type(t) == "function" then
t = t()
if type(t) == "table" then
@@ -5331,9 +5904,11 @@ utilities = utilities or { }
utilities.storage = utilities.storage or { }
local storage = utilities.storage
+local report = texio and texio.write_nl or print
+
function storage.mark(t)
if not t then
- texio.write_nl("fatal error: storage cannot be marked")
+ report("fatal error: storage cannot be marked")
return -- os.exit()
end
local m = getmetatable(t)
@@ -5363,12 +5938,36 @@ end
function storage.checked(t)
if not t then
- texio.write_nl("fatal error: storage has not been allocated")
+ report("fatal error: storage has not been allocated")
return -- os.exit()
end
return t
end
+-- function utilities.storage.delay(parent,name,filename)
+-- local m = getmetatable(parent)
+-- m.__list[name] = filename
+-- end
+--
+-- function utilities.storage.predefine(parent)
+-- local list = { }
+-- local m = getmetatable(parent) or {
+-- __list = list,
+-- __index = function(t,k)
+-- local l = require(list[k])
+-- t[k] = l
+-- return l
+-- end
+-- }
+-- setmetatable(parent,m)
+-- end
+--
+-- bla = { }
+-- utilities.storage.predefine(bla)
+-- utilities.storage.delay(bla,"test","oepsoeps")
+-- local t = bla.test
+-- table.print(t)
+-- print(t.a)
function storage.setinitializer(data,initialize)
local m = getmetatable(data) or { }
@@ -5393,12 +5992,14 @@ end
-- table namespace ?
-local function f_empty () return "" end -- t,k
-local function f_self (t,k) t[k] = k return k end
-local function f_ignore() end -- t,k,v
+local function f_empty () return "" end -- t,k
+local function f_self (t,k) t[k] = k return k end
+local function f_table (t,k) local v = { } t[k] = v return v end
+local function f_ignore() end -- t,k,v
local t_empty = { __index = f_empty }
local t_self = { __index = f_self }
+local t_table = { __index = f_table }
local t_ignore = { __newindex = f_ignore }
function table.setmetatableindex(t,f)
@@ -5408,6 +6009,8 @@ function table.setmetatableindex(t,f)
m.__index = f_empty
elseif f == "key" then
m.__index = f_self
+ elseif f == "table" then
+ m.__index = f_table
else
m.__index = f
end
@@ -5416,6 +6019,8 @@ function table.setmetatableindex(t,f)
setmetatable(t, t_empty)
elseif f == "key" then
setmetatable(t, t_self)
+ elseif f == "table" then
+ setmetatable(t, t_table)
else
setmetatable(t,{ __index = f })
end
@@ -5626,7 +6231,7 @@ if not modules then modules = { } end modules ['util-lua'] = {
}
local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format
-local loadstring, loadfile, type = loadstring, loadfile, type
+local load, loadfile, type = load, loadfile, type
utilities = utilities or {}
utilities.lua = utilities.lua or { }
@@ -5643,11 +6248,23 @@ luautilities.nofstrippedbytes = 0
local strippedchunks = { } -- allocate()
luautilities.strippedchunks = strippedchunks
+luautilities.suffixes = {
+ tma = "tma",
+ tmc = jit and "tmb" or "tmc",
+ lua = "lua",
+ luc = jit and "lub" or "luc",
+ lui = "lui",
+ luv = "luv",
+ luj = "luj",
+ tua = "tua",
+ tuc = "tuc",
+}
+
local function fatalerror(name)
utilities.report(format("fatal error in %q",name or "unknown"))
end
-if jit then
+if jit or status.luatex_version >= 74 then
local function register(name)
if tracestripping then
@@ -5660,7 +6277,7 @@ if jit then
local function stupidcompile(luafile,lucfile,strip)
local code = io.loaddata(luafile)
if code and code ~= "" then
- code = loadstring(code)
+ code = load(code)
if code then
code = dump(code,strip and luautilities.stripcode or luautilities.alwaysstripcode)
if code and code ~= "" then
@@ -5692,13 +6309,13 @@ if jit then
end
if forcestrip or luautilities.alwaysstripcode then
register(name)
- return loadstring(dump(code,true)), 0
+ return load(dump(code,true)), 0
else
return code, 0
end
elseif luautilities.alwaysstripcode then
register(name)
- return loadstring(dump(code,true)), 0
+ return load(dump(code,true)), 0
else
return code, 0
end
@@ -5706,14 +6323,14 @@ if jit then
function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
if forcestrip and luautilities.stripcode or luautilities.alwaysstripcode then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror(name)
end
register(name)
code = dump(code,true)
end
- return loadstring(code), 0
+ return load(code), 0
end
function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true
@@ -5754,67 +6371,79 @@ else
return delta
end
- local function strip_code_pc(dump,name)
- local before = #dump
- local version, format, endian, int, size, ins, num = byte(dump,5,11)
- local subint
- if endian == 1 then
- subint = function(dump, i, l)
- local val = 0
- for n = l, 1, -1 do
- val = val * 256 + byte(dump,i + n - 1)
+ local strip_code_pc
+
+ if _MAJORVERSION == 5 and _MINORVERSION == 1 then
+
+ strip_code_pc = function(dump,name)
+ local before = #dump
+ local version, format, endian, int, size, ins, num = byte(dump,5,11)
+ local subint
+ if endian == 1 then
+ subint = function(dump, i, l)
+ local val = 0
+ for n = l, 1, -1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
+ end
+ else
+ subint = function(dump, i, l)
+ local val = 0
+ for n = 1, l, 1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
end
- return val, i + l
end
- else
- subint = function(dump, i, l)
- local val = 0
- for n = 1, l, 1 do
- val = val * 256 + byte(dump,i + n - 1)
+ local strip_function
+ strip_function = function(dump)
+ local count, offset = subint(dump, 1, size)
+ local stripped, dirty = rep("\0", size), offset + count
+ offset = offset + count + int * 2 + 4
+ offset = offset + int + subint(dump, offset, int) * ins
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ local t
+ t, offset = subint(dump, offset, 1)
+ if t == 1 then
+ offset = offset + 1
+ elseif t == 4 then
+ offset = offset + size + subint(dump, offset, size)
+ elseif t == 3 then
+ offset = offset + num
+ end
end
- return val, i + l
- end
- end
- local strip_function
- strip_function = function(dump)
- local count, offset = subint(dump, 1, size)
- local stripped, dirty = rep("\0", size), offset + count
- offset = offset + count + int * 2 + 4
- offset = offset + int + subint(dump, offset, int) * ins
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- local t
- t, offset = subint(dump, offset, 1)
- if t == 1 then
- offset = offset + 1
- elseif t == 4 then
- offset = offset + size + subint(dump, offset, size)
- elseif t == 3 then
- offset = offset + num
+ count, offset = subint(dump, offset, int)
+ stripped = stripped .. sub(dump,dirty, offset - 1)
+ for n = 1, count do
+ local proto, off = strip_function(sub(dump,offset, -1))
+ stripped, offset = stripped .. proto, offset + off - 1
end
+ offset = offset + subint(dump, offset, int) * int + int
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size + int * 2
+ end
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size
+ end
+ stripped = stripped .. rep("\0", int * 3)
+ return stripped, offset
end
- count, offset = subint(dump, offset, int)
- stripped = stripped .. sub(dump,dirty, offset - 1)
- for n = 1, count do
- local proto, off = strip_function(sub(dump,offset, -1))
- stripped, offset = stripped .. proto, offset + off - 1
- end
- offset = offset + subint(dump, offset, int) * int + int
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- offset = offset + subint(dump, offset, size) + size + int * 2
- end
- count, offset = subint(dump, offset, int)
- for n = 1, count do
- offset = offset + subint(dump, offset, size) + size
- end
- stripped = stripped .. rep("\0", int * 3)
- return stripped, offset
+ dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1))
+ local after = #dump
+ local delta = register(name,before,after)
+ return dump, delta
+ end
+
+ else
+
+ strip_code_pc = function(dump,name)
+ return dump, 0
end
- dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1))
- local after = #dump
- local delta = register(name,before,after)
- return dump, delta
+
end
-- ... end of borrowed code.
@@ -5834,14 +6463,14 @@ else
end
if forcestrip then
local code, n = strip_code_pc(dump(code),name)
- return loadstring(code), n
+ return load(code), n
elseif luautilities.alwaysstripcode then
- return loadstring(strip_code_pc(dump(code),name))
+ return load(strip_code_pc(dump(code),name))
else
return code, 0
end
elseif luautilities.alwaysstripcode then
- return loadstring(strip_code_pc(dump(code),name))
+ return load(strip_code_pc(dump(code),name))
else
return code, 0
end
@@ -5850,20 +6479,20 @@ else
function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
local n = 0
if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror(name)
end
code, n = strip_code_pc(dump(code),name)
end
- return loadstring(code), n
+ return load(code), n
end
local function stupidcompile(luafile,lucfile,strip)
local code = io.loaddata(luafile)
local n = 0
if code and code ~= "" then
- code = loadstring(code)
+ code = load(code)
if not code then
fatalerror()
end
@@ -5903,6 +6532,7 @@ else
utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile)
end
cleanup = false -- better see how bad it is
+ done = true -- hm
end
if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then
utilities.report("lua: removing %s",luafile)
@@ -5949,7 +6579,6 @@ if not modules then modules = { } end modules ['util-prs'] = {
}
local lpeg, table, string = lpeg, table, string
-
local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
local lpegmatch, patterns = lpeg.match, lpeg.patterns
local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find
@@ -6367,6 +6996,45 @@ function parsers.rfc4180splitter(specification)
end
end
+-- utilities.parsers.stepper("1,7-",9,function(i) print(">>>",i) end)
+-- utilities.parsers.stepper("1-3,7,8,9")
+-- utilities.parsers.stepper("1-3,6,7",function(i) print(">>>",i) end)
+-- utilities.parsers.stepper(" 1 : 3, ,7 ")
+-- utilities.parsers.stepper("1:4,9:13,24:*",30)
+
+local function ranger(first,last,n,action)
+ if not first then
+ -- forget about it
+ elseif last == true then
+ for i=first,n or first do
+ action(i)
+ end
+ elseif last then
+ for i=first,last do
+ action(i)
+ end
+ else
+ action(first)
+ end
+end
+
+local cardinal = patterns.cardinal / tonumber
+local spacers = patterns.spacer^0
+local endofstring = patterns.endofstring
+
+local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + Cc(true) ) + Cc(false) )
+ * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
+
+local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
+ * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring -- we're sort of strict (could do without endofstring)
+
+function utilities.parsers.stepper(str,n,action)
+ if type(n) == "function" then
+ lpegmatch(stepper,str,1,false,n or print)
+ else
+ lpegmatch(stepper,str,1,n,action or print)
+ end
+end
end -- of closure
@@ -6817,7 +7485,7 @@ if not modules then modules = { } end modules ['trac-set'] = { -- might become u
local type, next, tostring = type, next, tostring
local concat = table.concat
-local format, find, lower, gsub, escapedpattern = string.format, string.find, string.lower, string.gsub, string.escapedpattern
+local format, find, lower, gsub, topattern = string.format, string.find, string.lower, string.gsub, string.topattern
local is_boolean = string.is_boolean
local settings_to_hash = utilities.parsers.settings_to_hash
local allocate = utilities.storage.allocate
@@ -6905,7 +7573,7 @@ local function set(t,what,newvalue)
else
value = is_boolean(value,value)
end
- w = "^" .. escapedpattern(w,true) .. "$" -- new: anchored
+ w = topattern(w,true,true)
for name, functions in next, data do
if done[name] then
-- prevent recursion due to wildcards
@@ -6959,7 +7627,8 @@ function setters.register(t,what,...)
end
end
local default = functions.default -- can be set from cnf file
- for _, fnc in next, { ... } do
+ for i=1,select("#",...) do
+ local fnc = select(i,...)
local typ = type(fnc)
if typ == "string" then
if trace_initialize then
@@ -7028,7 +7697,7 @@ function setters.show(t)
local value, default, modules = functions.value, functions.default, #functions
value = value == nil and "unset" or tostring(value)
default = default == nil and "unset" or tostring(default)
- t.report("%-50s modules: %2i default: %6s value: %6s",name,modules,default,value)
+ t.report("%-50s modules: %2i default: %-12s value: %-12s",name,modules,default,value)
end
end
t.report()
@@ -7052,17 +7721,29 @@ local function report(setter,...)
end
end
-function setters.new(name)
+local function default(setter,name)
+ local d = setter.data[name]
+ return d and d.default
+end
+
+local function value(setter,name)
+ local d = setter.data[name]
+ return d and (d.value or d.default)
+end
+
+function setters.new(name) -- we could use foo:bar syntax (but not used that often)
local setter -- we need to access it in setter itself
setter = {
data = allocate(), -- indexed, but also default and value fields
name = name,
- report = function(...) report (setter,...) end,
- enable = function(...) enable (setter,...) end,
- disable = function(...) disable (setter,...) end,
- register = function(...) register(setter,...) end,
- list = function(...) list (setter,...) end,
- show = function(...) show (setter,...) end,
+ report = function(...) report (setter,...) end,
+ enable = function(...) enable (setter,...) end,
+ disable = function(...) disable (setter,...) end,
+ register = function(...) register(setter,...) end,
+ list = function(...) list (setter,...) end,
+ show = function(...) show (setter,...) end,
+ default = function(...) return default (setter,...) end,
+ value = function(...) return value (setter,...) end,
}
data[name] = setter
return setter
@@ -7189,9 +7870,9 @@ if not modules then modules = { } end modules ['trac-log'] = {
local write_nl, write = texio and texio.write_nl or print, texio and texio.write or io.write
local format, gmatch, find = string.format, string.gmatch, string.find
local concat, insert, remove = table.concat, table.insert, table.remove
-local escapedpattern = string.escapedpattern
+local topattern = string.topattern
local texcount = tex and tex.count
-local next, type = next, type
+local next, type, select = next, type, select
local setmetatableindex = table.setmetatableindex
@@ -7502,7 +8183,7 @@ local function setblocked(category,value)
if data[c] then
v.state = value
else
- c = escapedpattern(c,true)
+ c = topattern(c,true,true)
for k, v in next, data do
if find(k,c) then
v.state = value
@@ -7720,10 +8401,10 @@ local function reporthelp(t,...)
if type(helpinfo) == "string" then
reportlines(t,helpinfo)
elseif type(helpinfo) == "table" then
- local tags = { ... }
- for i=1,#tags do
- reportlines(t,t.helpinfo[tags[i]])
- if i < #tags then
+ local n = select("#",...)
+ for i=1,n do
+ reportlines(t,t.helpinfo[select(i,...)])
+ if i < n then
t.report()
end
end
@@ -8013,6 +8694,10 @@ local format, sub, match, gsub, find = string.format, string.sub, string.match,
local unquoted, quoted = string.unquoted, string.quoted
local concat, insert, remove = table.concat, table.insert, table.remove
local loadedluacode = utilities.lua.loadedluacode
+local luasuffixes = utilities.lua.suffixes
+
+environment = environment or { }
+local environment = environment
-- precautions
@@ -8022,9 +8707,29 @@ function os.setlocale()
-- no way you can mess with it
end
--- dirty tricks
+-- dirty tricks (we will replace the texlua call by luatex --luaonly)
-if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaonly" then
+local validengines = allocate {
+ ["luatex"] = true,
+ ["luajittex"] = true,
+ -- ["luatex.exe"] = true,
+ -- ["luajittex.exe"] = true,
+}
+
+local basicengines = allocate {
+ ["luatex"] = "luatex",
+ ["texlua"] = "luatex",
+ ["texluac"] = "luatex",
+ ["luajittex"] = "luajittex",
+ ["texluajit"] = "luajittex",
+ -- ["texlua.exe"] = "luatex",
+ -- ["texluajit.exe"] = "luajittex",
+}
+
+environment.validengines = validengines
+environment.basicengines = basicengines
+
+if arg and validengines[file.removesuffix(arg[0])] and arg[1] == "--luaonly" then
arg[-1] = arg[0]
arg[ 0] = arg[2]
for k=3,#arg do
@@ -8056,9 +8761,6 @@ end
-- environment
-environment = environment or { }
-local environment = environment
-
environment.arguments = allocate()
environment.files = allocate()
environment.sortedflags = nil
@@ -8114,7 +8816,7 @@ function environment.initializearguments(arg)
end
end
end
- environment.ownname = environment.ownname or arg[0] or 'unknown.lua'
+ environment.ownname = file.reslash(environment.ownname or arg[0] or 'unknown.lua')
end
function environment.setargument(name,value)
@@ -8195,6 +8897,22 @@ function environment.reconstructcommandline(arg,noquote)
end
end
+-- -- to be tested:
+--
+-- function environment.reconstructcommandline(arg,noquote)
+-- arg = arg or environment.originalarguments
+-- if noquote and #arg == 1 then
+-- return unquoted(resolvers.resolve(arg[1]))
+-- elseif #arg > 0 then
+-- local result = { }
+-- for i=1,#arg do
+-- result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote
+-- end
+-- return concat(result," ")
+-- else
+-- return ""
+-- end
+-- end
if arg then
@@ -8289,9 +9007,11 @@ function environment.loadluafile(filename, version)
local lucname, luaname, chunk
local basename = file.removesuffix(filename)
if basename == filename then
- lucname, luaname = basename .. ".luc", basename .. ".lua"
+ luaname = fiule.addsuffix(basename,luasuffixes.lua)
+ lucname = fiule.addsuffix(basename,luasuffixes.luc)
else
- lucname, luaname = nil, basename -- forced suffix
+ luaname = basename -- forced suffix
+ lucname = nil
end
-- when not overloaded by explicit suffix we look for a luc file first
local fullname = (lucname and environment.luafile(lucname)) or ""
@@ -8372,7 +9092,6 @@ xml = xml or { }
local xml = xml
-local utf = unicode.utf8
local concat, remove, insert = table.concat, table.remove, table.insert
local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber
local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub
@@ -9583,7 +10302,7 @@ if not modules then modules = { } end modules ['lxml-lpt'] = {
-- todo: B/C/[get first match]
local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
+local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
@@ -10195,7 +10914,7 @@ end
local function register_expression(expression)
local converted = lpegmatch(converter,expression)
- local runner = loadstring(format(template_e,converted))
+ local runner = load(format(template_e,converted))
runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
end
@@ -10203,9 +10922,9 @@ end
local function register_finalizer(protocol,name,arguments)
local runner
if arguments and arguments ~= "" then
- runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
+ runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
else
- runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name))
+ runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
end
runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
@@ -10597,6 +11316,7 @@ end
expressions.child = function(e,pattern)
return applylpath(e,pattern) -- todo: cache
end
+
expressions.count = function(e,pattern) -- what if pattern == empty or nil
local collected = applylpath(e,pattern) -- todo: cache
return pattern and (collected and #collected) or 0
@@ -10604,13 +11324,30 @@ end
-- external
-expressions.oneof = function(s,...) -- slow
- local t = {...} for i=1,#t do if s == t[i] then return true end end return false
+-- expressions.oneof = function(s,...)
+-- local t = {...}
+-- for i=1,#t do
+-- if s == t[i] then
+-- return true
+-- end
+-- end
+-- return false
+-- end
+
+expressions.oneof = function(s,...)
+ for i=1,select("#",...) do
+ if s == select(i,...) then
+ return true
+ end
+ end
+ return false
end
+
expressions.error = function(str)
xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
return false
end
+
expressions.undefined = function(s)
return s == nil
end
@@ -12220,7 +12957,6 @@ if not modules then modules = { } end modules ['data-ini'] = {
}
local gsub, find, gmatch, char = string.gsub, string.find, string.gmatch, string.char
-local concat = table.concat
local next, type = next, type
local filedirname, filebasename, filejoin = file.dirname, file.basename, file.join
@@ -12311,6 +13047,10 @@ do
local args = environment.originalarguments or arg -- this needs a cleanup
+ if not environment.ownmain then
+ environment.ownmain = status and string.match(string.lower(status.banner),"this is ([%a]+)") or "luatex"
+ end
+
local ownbin = environment.ownbin or args[-2] or arg[-2] or args[-1] or arg[-1] or arg[0] or "luatex"
local ownpath = environment.ownpath or os.selfdir
@@ -12427,19 +13167,6 @@ end
environment.texroot = file.collapsepath(texroot)
--- Tracing. Todo ...
-
-function resolvers.settrace(n) -- no longer number but: 'locating' or 'detail'
- if n then
- trackers.disable("resolvers.*")
- trackers.enable("resolvers."..n)
- end
-end
-
-resolvers.settrace(osgetenv("MTX_INPUT_TRACE"))
-
--- todo:
-
if profiler then
directives.register("system.profile",function()
profiler.start("luatex-profile.log")
@@ -12946,6 +13673,8 @@ resolvers.suffixes = suffixes
resolvers.dangerous = dangerous
resolvers.suffixmap = suffixmap
+local luasuffixes = utilities.lua.suffixes
+
local relations = allocate { -- todo: handlers also here
core = {
ofm = { -- will become obsolete
@@ -13031,7 +13760,7 @@ local relations = allocate { -- todo: handlers also here
lua = {
names = { "lua" },
variable = 'LUAINPUTS',
- suffixes = { 'lua', 'luc', 'tma', 'tmc' },
+ suffixes = { luasuffixes.lua, luasuffixes.luc, luasuffixes.tma, luasuffixes.tmc },
},
lib = {
names = { "lib" },
@@ -13227,6 +13956,7 @@ if not modules then modules = { } end modules ['data-tmp'] = {
local format, lower, gsub, concat = string.format, string.lower, string.gsub, table.concat
local serialize, serializetofile = table.serialize, table.tofile
local mkdirs, isdir = dir.mkdirs, lfs.isdir
+local addsuffix, is_writable, is_readable = file.addsuffix, file.is_writable, file.is_readable
local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end)
local trace_cache = false trackers.register("resolvers.cache", function(v) trace_cache = v end)
@@ -13251,8 +13981,10 @@ end
-- end of intermezzo
-caches = caches or { }
-local caches = caches
+caches = caches or { }
+local caches = caches
+
+local luasuffixes = utilities.lua.suffixes
caches.base = caches.base or "luatex-cache"
caches.more = caches.more or "context"
@@ -13280,18 +14012,18 @@ local function identify()
cachepath = file.collapsepath(cachepath)
local valid = isdir(cachepath)
if valid then
- if file.is_readable(cachepath) then
+ if is_readable(cachepath) then
readables[#readables+1] = cachepath
- if not writable and file.is_writable(cachepath) then
+ if not writable and is_writable(cachepath) then
writable = cachepath
end
end
elseif not writable and caches.force then
local cacheparent = file.dirname(cachepath)
- if file.is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths)
+ if is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths)
if not caches.ask or io.ask(format("\nShould I create the cache path %s?",cachepath), "no", { "yes", "no" }) == "yes" then
mkdirs(cachepath)
- if isdir(cachepath) and file.is_writable(cachepath) then
+ if isdir(cachepath) and is_writable(cachepath) then
report_caches("created: %s",cachepath)
writable = cachepath
readables[#readables+1] = cachepath
@@ -13313,8 +14045,8 @@ local function identify()
cachepath = resolvers.resolve(cachepath)
cachepath = resolvers.cleanpath(cachepath)
local valid = isdir(cachepath)
- if valid and file.is_readable(cachepath) then
- if not writable and file.is_writable(cachepath) then
+ if valid and is_readable(cachepath) then
+ if not writable and is_writable(cachepath) then
readables[#readables+1] = cachepath
writable = cachepath
break
@@ -13403,7 +14135,7 @@ end
local r_cache, w_cache = { }, { } -- normally w in in r but who cares
-local function getreadablepaths(...) -- we can optimize this as we have at most 2 tags
+local function getreadablepaths(...)
local tags = { ... }
local hash = concat(tags,"/")
local done = r_cache[hash]
@@ -13446,7 +14178,7 @@ function caches.getfirstreadablefile(filename,...)
for i=1,#rd do
local path = rd[i]
local fullname = file.join(path,filename)
- if file.is_readable(fullname) then
+ if is_readable(fullname) then
usedreadables[i] = true
return fullname, path
end
@@ -13467,7 +14199,7 @@ function caches.define(category,subcategory) -- for old times sake
end
function caches.setluanames(path,name)
- return path .. "/" .. name .. ".tma", path .. "/" .. name .. ".tmc"
+ return format("%s/%s.%s",path,name,luasuffixes.tma), format("%s/%s.%s",path,name,luasuffixes.tmc)
end
function caches.loaddata(readables,name)
@@ -13477,7 +14209,13 @@ function caches.loaddata(readables,name)
for i=1,#readables do
local path = readables[i]
local tmaname, tmcname = caches.setluanames(path,name)
- local loader = loadfile(tmcname) or loadfile(tmaname)
+ local loader = loadfile(tmcname)
+ if not loader then
+ -- in case we have a different engine
+ utilities.lua.compile(tmaname,tmcname)
+ --
+ loader = loadfile(tmaname)
+ end
if loader then
loader = loader()
collectgarbage("step")
@@ -13489,11 +14227,15 @@ end
function caches.is_writable(filepath,filename)
local tmaname, tmcname = caches.setluanames(filepath,filename)
- return file.is_writable(tmaname)
+ return is_writable(tmaname)
end
local saveoptions = { compact = true }
+-- add some point we will only use the internal bytecode compiler and
+-- then we can flag success in the tma so that it can trigger a compile
+-- if the other engine
+
function caches.savedata(filepath,filename,data,raw)
local tmaname, tmcname = caches.setluanames(filepath,filename)
local reduce, simplify = true, true
@@ -13519,9 +14261,9 @@ end
function caches.loadcontent(cachename,dataname)
local name = caches.hashed(cachename)
- local full, path = caches.getfirstreadablefile(name ..".lua","trees")
+ local full, path = caches.getfirstreadablefile(addsuffix(name,luasuffixes.lua),"trees")
local filename = file.join(path,name)
- local blob = loadfile(filename .. ".luc") or loadfile(filename .. ".lua")
+ local blob = loadfile(addsuffix(filename,luasuffixes.luc)) or loadfile(addsuffix(filename,luasuffixes.lua))
if blob then
local data = blob()
if data and data.content then
@@ -13556,9 +14298,10 @@ end
function caches.savecontent(cachename,dataname,content)
local name = caches.hashed(cachename)
- local full, path = caches.setfirstwritablefile(name ..".lua","trees")
+ local full, path = caches.setfirstwritablefile(addsuffix(name,luasuffixes.lua),"trees")
local filename = file.join(path,name) -- is full
- local luaname, lucname = filename .. ".lua", filename .. ".luc"
+ local luaname = addsuffix(filename,luasuffixes.lua)
+ local lucname = addsuffix(filename,luasuffixes.luc)
if trace_locating then
report_resolvers("preparing '%s' for '%s'",dataname,cachename)
end
@@ -13763,6 +14506,7 @@ local joinpath = file.joinpath
local allocate = utilities.storage.allocate
local settings_to_array = utilities.parsers.settings_to_array
local setmetatableindex = table.setmetatableindex
+local luasuffixes = utilities.lua.suffixes
local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end)
local trace_detail = false trackers.register("resolvers.details", function(v) trace_detail = v end)
@@ -15325,15 +16069,19 @@ function resolvers.dowithvariable(name,func)
end
function resolvers.locateformat(name)
- local barename = file.removesuffix(name) -- gsub(name,"%.%a+$","")
- local fmtname = caches.getfirstreadablefile(barename..".fmt","formats") or ""
+ local engine = environment.ownmain or "luatex"
+ local barename = file.removesuffix(name)
+ local fullname = file.addsuffix(barename,"fmt")
+ local fmtname = caches.getfirstreadablefile(fullname,"formats",engine) or ""
if fmtname == "" then
- fmtname = resolvers.findfile(barename..".fmt")
+ fmtname = resolvers.findfile(fullname)
fmtname = resolvers.cleanpath(fmtname)
end
if fmtname ~= "" then
local barename = file.removesuffix(fmtname)
- local luaname, lucname, luiname = barename .. ".lua", barename .. ".luc", barename .. ".lui"
+ local luaname = file.addsuffix(barename,luasuffixes.lua)
+ local lucname = file.addsuffix(barename,luasuffixes.luc)
+ local luiname = file.addsuffix(barename,luasuffixes.lui)
if lfs.isfile(luiname) then
return barename, luiname
elseif lfs.isfile(lucname) then
@@ -15430,10 +16178,9 @@ local resolvers = resolvers
local prefixes = utilities.storage.allocate()
resolvers.prefixes = prefixes
-local gsub = string.gsub
local cleanpath, findgivenfile, expansion = resolvers.cleanpath, resolvers.findgivenfile, resolvers.expansion
local getenv = resolvers.getenv -- we can probably also use resolvers.expansion
-local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match
+local P, S, R, C, Cs, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cs, lpeg.match
local joinpath, basename, dirname = file.join, file.basename, file.dirname
local getmetatable, rawset, type = getmetatable, rawset, type
@@ -15555,6 +16302,28 @@ end
-- todo: use an lpeg (see data-lua for !! / stripper)
+-- local function resolve(str) -- use schemes, this one is then for the commandline only
+-- if type(str) == "table" then
+-- local t = { }
+-- for i=1,#str do
+-- t[i] = resolve(str[i])
+-- end
+-- return t
+-- else
+-- local res = resolved[str]
+-- if not res then
+-- res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added)
+-- resolved[str] = res
+-- abstract[res] = str
+-- end
+-- return res
+-- end
+-- end
+
+-- home:xx;selfautoparent:xx;
+
+local pattern = Cs((C(R("az")^2) * P(":") * C((1-S(" \"\';,"))^1) / _resolve_ + P(1))^0)
+
local function resolve(str) -- use schemes, this one is then for the commandline only
if type(str) == "table" then
local t = { }
@@ -15565,7 +16334,7 @@ local function resolve(str) -- use schemes, this one is then for the commandline
else
local res = resolved[str]
if not res then
- res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added)
+ res = lpegmatch(pattern,str)
resolved[str] = res
abstract[res] = str
end
@@ -15998,7 +16767,7 @@ statistics.register("used cache path", function() return caches.usedpaths() end
function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname == formatname
local enginebanner = status.list().banner
if formatbanner and enginebanner and sourcefile then
- local luvname = file.replacesuffix(texname,"luv")
+ local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv
local luvdata = {
enginebanner = enginebanner,
formatbanner = formatbanner,
@@ -16009,10 +16778,14 @@ function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname ==
end
end
+-- todo: check this at startup and return (say) 999 as signal that the run
+-- was aborted due to a wrong format in which case mtx-context can trigger
+-- a remake
+
function statistics.checkfmtstatus(texname)
local enginebanner = status.list().banner
if enginebanner and texname then
- local luvname = file.replacesuffix(texname,"luv")
+ local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv
if lfs.isfile(luvname) then
local luv = dofile(luvname)
if luv and luv.sourcefile then
@@ -16389,7 +17162,7 @@ if not modules then modules = { } end modules ['data-sch'] = {
license = "see context related readme files"
}
-local loadstring = loadstring
+local load = load
local gsub, concat, format = string.gsub, table.concat, string.format
local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders
@@ -16575,7 +17348,7 @@ schemes.fetchstring = fetchstring
function schemes.fetchtable(url,data)
local reply = fetchstring(url,data)
if reply then
- local s = loadstring("return " .. reply)
+ local s = load("return " .. reply)
if s then
return s()
end
@@ -16603,6 +17376,8 @@ if not modules then modules = { } end modules ['data-lua'] = {
-- -- local mylib = require("libtest")
-- -- local mysql = require("luasql.mysql")
+local searchers = package.searchers or package.loaders
+
local concat = table.concat
local trace_libraries = false
@@ -16704,13 +17479,9 @@ function package.extraclibpath(...)
end
end
-if not package.loaders then
- package.loaders = package.searchers -- 5.2
-end
-
-if not package.loaders[-2] then
+if not searchers[-2] then
-- use package-path and package-cpath
- package.loaders[-2] = package.loaders[2]
+ searchers[-2] = searchers[2]
end
local function loadedaslib(resolved,rawname)
@@ -16721,7 +17492,7 @@ local function loadedbylua(name)
if trace_libraries then
report_libraries("! locating %q using normal loader",name)
end
- local resolved = package.loaders[-2](name)
+ local resolved = searchers[-2](name)
end
local function loadedbyformat(name,rawname,suffixes,islib)
@@ -16776,7 +17547,7 @@ local function notloaded(name)
end
end
-package.loaders[2] = function(name)
+searchers[2] = function(name)
local thename = gsub(name,"%.","/")
local luaname = file.addsuffix(thename,"lua")
local libname = file.addsuffix(thename,os.libsuffix)
@@ -16790,8 +17561,8 @@ package.loaders[2] = function(name)
or notloaded (name)
end
--- package.loaders[3] = nil
--- package.loaders[4] = nil
+-- searchers[3] = nil
+-- searchers[4] = nil
resolvers.loadlualib = require
@@ -17161,13 +17932,11 @@ if not modules then modules = { } end modules ['luat-fmt'] = {
local format = string.format
+local quoted = string.quoted
+local luasuffixes = utilities.lua.suffixes
local report_format = logs.reporter("resolvers","formats")
--- helper for mtxrun
-
-local quoted = string.quoted
-
local function primaryflags() -- not yet ok
local trackers = environment.argument("trackers")
local directives = environment.argument("directives")
@@ -17182,13 +17951,14 @@ local function primaryflags() -- not yet ok
end
function environment.make_format(name)
+ local engine = environment.ownmain or "luatex"
-- change to format path (early as we need expanded paths)
- local olddir = lfs.currentdir()
- local path = caches.getwritablepath("formats") or "" -- maybe platform
+ local olddir = dir.current()
+ local path = caches.getwritablepath("formats",engine) or "" -- maybe platform
if path ~= "" then
lfs.chdir(path)
end
- report_format("format path: %s",lfs.currentdir())
+ report_format("format path: %s",dir.current())
-- check source file
local texsourcename = file.addsuffix(name,"mkiv")
local fulltexsourcename = resolvers.findfile(texsourcename,"tex") or ""
@@ -17225,13 +17995,12 @@ function environment.make_format(name)
elseif type(usedlualibs) == "table" then
report_format("using stub specification: %s",fullspecificationname)
local texbasename = file.basename(name)
- local luastubname = file.addsuffix(texbasename,"lua")
- local lucstubname = file.addsuffix(texbasename,"luc")
+ local luastubname = file.addsuffix(texbasename,luasuffixes.lua)
+ local lucstubname = file.addsuffix(texbasename,luasuffixes.luc)
-- pack libraries in stub
report_format("creating initialization file: %s",luastubname)
utilities.merger.selfcreate(usedlualibs,specificationpath,luastubname)
-- compile stub file (does not save that much as we don't use this stub at startup any more)
- local strip = resolvers.booleanvariable("LUACSTRIP", true)
if utilities.lua.compile(luastubname,lucstubname) and lfs.isfile(lucstubname) then
report_format("using compiled initialization file: %s",lucstubname)
usedluastub = lucstubname
@@ -17245,7 +18014,7 @@ function environment.make_format(name)
return
end
-- generate format
- local command = format("luatex --ini %s --lua=%s %s %sdump",primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\")
+ local command = format("%s --ini %s --lua=%s %s %sdump",engine,primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\")
report_format("running command: %s\n",command)
os.spawn(command)
-- remove related mem files
@@ -17264,8 +18033,9 @@ end
function environment.run_format(name,data,more)
if name and name ~= "" then
+ local engine = environment.ownmain or "luatex"
local barename = file.removesuffix(name)
- local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats")
+ local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats",engine)
if fmtname == "" then
fmtname = resolvers.findfile(file.addsuffix(barename,"fmt")) or ""
end
@@ -17282,7 +18052,7 @@ function environment.run_format(name,data,more)
report_format("using format name: %s",fmtname)
report_format("no luc/lua with name: %s",barename)
else
- local command = format("luatex %s --fmt=%s --lua=%s %s %s",primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "")
+ local command = format("%s %s --fmt=%s --lua=%s %s %s",engine,primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "")
report_format("running command: %s",command)
os.spawn(command)
end
@@ -17423,10 +18193,11 @@ own = { } -- not local, might change
own.libs = { -- order can be made better
+ 'l-lua.lua',
+ 'l-lpeg.lua',
'l-function.lua',
'l-string.lua',
'l-table.lua',
- 'l-lpeg.lua',
'l-io.lua',
'l-number.lua',
'l-set.lua',
@@ -17656,6 +18427,10 @@ local helpinfo = [[
--variables show configuration variables
--configurations show configuration order
+--directives show (known) directives
+--trackers show (known) trackers
+--experiments show (known) experiments
+
--expand-braces expand complex variable
--expand-path expand variable (resolve paths)
--expand-var expand variable (resolve references)
@@ -18346,8 +19121,18 @@ else
end
+if e_argument("script") or e_argument("scripts") then
+
+ -- run a script by loading it (using libs), pass args
-if e_argument("selfmerge") then
+ runners.loadbase()
+ if is_mkii_stub then
+ ok = runners.execute_script(filename,false,true)
+ else
+ ok = runners.execute_ctx_script(filename)
+ end
+
+elseif e_argument("selfmerge") then
-- embed used libraries
@@ -18377,17 +19162,6 @@ elseif e_argument("ctxlua") or e_argument("internal") then
runners.loadbase()
ok = runners.execute_script(filename,true)
-elseif e_argument("script") or e_argument("scripts") then
-
- -- run a script by loading it (using libs), pass args
-
- runners.loadbase()
- if is_mkii_stub then
- ok = runners.execute_script(filename,false,true)
- else
- ok = runners.execute_ctx_script(filename)
- end
-
elseif e_argument("execute") then
-- execute script
@@ -18615,6 +19389,18 @@ elseif e_argument("version") then
application.version()
+elseif e_argument("directives") then
+
+ directives.show()
+
+elseif e_argument("trackers") then
+
+ trackers.show()
+
+elseif e_argument("experiments") then
+
+ experiments.show()
+
elseif e_argument("help") or filename=='help' or filename == "" then
application.help()