From 5f48570bba149ac17f45c80d5ee95306aa69d0c9 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Mon, 24 Dec 2012 20:08:00 +0100 Subject: beta 2012.12.24 20:08 --- tex/context/base/anch-pos.lua | 2 +- tex/context/base/attr-lay.lua | 2 +- tex/context/base/back-exp.lua | 3 +- tex/context/base/bibl-bib.lua | 1 - tex/context/base/buff-ini.lua | 90 +- tex/context/base/char-ini.lua | 86 +- tex/context/base/char-tex.lua | 13 +- tex/context/base/char-utf.lua | 23 +- tex/context/base/chem-ini.lua | 58 +- tex/context/base/chem-str.lua | 448 ++- tex/context/base/chem-str.mkiv | 83 +- tex/context/base/cldf-bas.mkiv | 1 + tex/context/base/cldf-com.lua | 5 +- tex/context/base/cldf-ini.lua | 30 +- tex/context/base/cldf-prs.lua | 52 + tex/context/base/colo-ini.lua | 11 +- tex/context/base/cont-new.mkii | 2 +- tex/context/base/cont-new.mkiv | 2 +- tex/context/base/cont-yes.mkiv | 11 + tex/context/base/context-version.pdf | Bin 4089 -> 4112 bytes tex/context/base/context-version.png | Bin 40551 -> 40425 bytes tex/context/base/context.mkii | 2 +- tex/context/base/context.mkiv | 2 +- tex/context/base/core-con.lua | 2 - tex/context/base/core-uti.lua | 1 + tex/context/base/data-env.lua | 4 +- tex/context/base/data-ini.lua | 18 +- tex/context/base/data-lua.lua | 18 +- tex/context/base/data-pre.lua | 27 +- tex/context/base/data-res.lua | 13 +- tex/context/base/data-sch.lua | 4 +- tex/context/base/data-tex.lua | 10 +- tex/context/base/data-tmp.lua | 48 +- tex/context/base/data-use.lua | 8 +- tex/context/base/file-job.lua | 2 +- tex/context/base/font-con.lua | 3 - tex/context/base/font-def.lua | 1 - tex/context/base/font-ext.lua | 1 - tex/context/base/font-ota.lua | 2 +- tex/context/base/font-otf.lua | 26 +- tex/context/base/font-otn.lua | 2 + tex/context/base/font-otp.lua | 226 +- tex/context/base/font-syn.lua | 1 - tex/context/base/font-vf.lua | 7 +- tex/context/base/l-dir.lua | 26 +- tex/context/base/l-file.lua | 133 +- tex/context/base/l-io.lua | 2 +- tex/context/base/l-lpeg.lua | 323 +- tex/context/base/l-lua.lua | 107 + tex/context/base/l-number.lua | 238 +- tex/context/base/l-string.lua | 194 +- tex/context/base/l-table.lua | 238 +- tex/context/base/l-unicode.lua | 693 ++-- tex/context/base/lang-ini.lua | 1 - tex/context/base/lang-url.lua | 5 +- tex/context/base/lang-wrd.lua | 4 +- tex/context/base/lpdf-epa.lua | 141 +- tex/context/base/lpdf-epd.lua | 28 +- tex/context/base/lpdf-ini.lua | 3 +- tex/context/base/lpdf-swf.lua | 2 +- tex/context/base/luat-bas.mkiv | 3 +- tex/context/base/luat-cnf.lua | 77 +- tex/context/base/luat-cod.mkiv | 2 +- tex/context/base/luat-env.lua | 71 +- tex/context/base/luat-exe.lua | 15 +- tex/context/base/luat-fio.lua | 3 + tex/context/base/luat-fmt.lua | 25 +- tex/context/base/luat-ini.lua | 232 +- tex/context/base/luat-ini.mkiv | 100 +- tex/context/base/lxml-ent.lua | 3 +- tex/context/base/lxml-inf.lua | 2 + tex/context/base/lxml-lpt.lua | 30 +- tex/context/base/lxml-tab.lua | 1 - tex/context/base/lxml-tex.lua | 36 +- tex/context/base/m-database.lua | 16 +- tex/context/base/m-morse.mkvi | 23 +- tex/context/base/m-timing.mkiv | 10 +- tex/context/base/math-ini.lua | 3 +- tex/context/base/math-ini.mkiv | 12 + tex/context/base/math-noa.lua | 2 - tex/context/base/meta-fun.lua | 4 +- tex/context/base/meta-imp-dum.mkiv | 1 + tex/context/base/meta-ini.lua | 117 +- tex/context/base/meta-ini.mkiv | 13 +- tex/context/base/meta-pag.mkiv | 2 + tex/context/base/meta-pdf.lua | 1 + tex/context/base/mlib-pdf.lua | 4 +- tex/context/base/mlib-run.lua | 2 +- tex/context/base/mtx-context-copy.tex | 151 + tex/context/base/mtx-context-select.tex | 3 +- tex/context/base/mtx-context-timing.tex | 6 +- tex/context/base/mult-de.mkii | 1 + tex/context/base/mult-def.lua | 4 + tex/context/base/mult-en.mkii | 1 + tex/context/base/mult-fr.mkii | 1 + tex/context/base/mult-it.mkii | 1 + tex/context/base/mult-low.lua | 22 +- tex/context/base/mult-nl.mkii | 1 + tex/context/base/mult-pe.mkii | 1 + tex/context/base/mult-ro.mkii | 1 + tex/context/base/node-aux.lua | 2 +- tex/context/base/node-fnt.lua | 161 + tex/context/base/node-ini.lua | 1 - tex/context/base/node-inj.lua | 2 +- tex/context/base/node-pro.lua | 1 - tex/context/base/node-typ.lua | 2 +- tex/context/base/phys-dim.mkiv | 4 + tex/context/base/s-abr-01.tex | 1 + tex/context/base/s-abr-04.tex | 1 + tex/context/base/s-inf-03.mkiv | 33 +- tex/context/base/s-mod-01.mkiv | 3 + tex/context/base/sort-ini.lua | 7 +- tex/context/base/spac-ver.mkiv | 16 +- tex/context/base/status-files.pdf | Bin 24378 -> 24410 bytes tex/context/base/status-lua.pdf | Bin 199955 -> 203150 bytes tex/context/base/status-mkiv.tex | 9 + tex/context/base/strc-bkm.lua | 2 +- tex/context/base/strc-doc.lua | 7 +- tex/context/base/strc-ini.lua | 10 +- tex/context/base/strc-ref.lua | 2 +- tex/context/base/strc-ref.mkvi | 4 +- tex/context/base/syst-aux.lua | 1 - tex/context/base/syst-con.lua | 2 +- tex/context/base/syst-lua.mkiv | 12 +- tex/context/base/toks-ini.lua | 5 +- tex/context/base/trac-log.lua | 14 +- tex/context/base/trac-set.lua | 35 +- tex/context/base/trac-tim.lua | 6 +- tex/context/base/typo-dir.lua | 2 - tex/context/base/typo-krn.lua | 2 - tex/context/base/typo-spa.lua | 2 - tex/context/base/util-lua.lua | 161 +- tex/context/base/util-pck.lua | 29 +- tex/context/base/util-prs.lua | 40 +- tex/context/base/util-seq.lua | 4 +- tex/context/base/util-sql-users.lua | 4 +- tex/context/base/util-sql.lua | 4 +- tex/context/base/util-sta.lua | 6 +- tex/context/base/util-sto.lua | 66 +- tex/context/base/util-tab.lua | 28 +- tex/context/base/x-mathml.lua | 4 +- tex/context/interface/keys-cs.xml | 1 + tex/context/interface/keys-de.xml | 1 + tex/context/interface/keys-en.xml | 1 + tex/context/interface/keys-fr.xml | 1 + tex/context/interface/keys-it.xml | 1 + tex/context/interface/keys-nl.xml | 1 + tex/context/interface/keys-pe.xml | 1 + tex/context/interface/keys-ro.xml | 1 + tex/generic/context/luatex/luatex-fonts-merged.lua | 3596 ++++++++++---------- tex/generic/context/luatex/luatex-fonts.lua | 4 +- 151 files changed, 5173 insertions(+), 3622 deletions(-) create mode 100644 tex/context/base/cldf-prs.lua create mode 100644 tex/context/base/l-lua.lua create mode 100644 tex/context/base/mtx-context-copy.tex (limited to 'tex') diff --git a/tex/context/base/anch-pos.lua b/tex/context/base/anch-pos.lua index 6fa916291..eda0ba37a 100644 --- a/tex/context/base/anch-pos.lua +++ b/tex/context/base/anch-pos.lua @@ -20,7 +20,7 @@ more efficient.

local commands, context = commands, context local tostring, next, rawget, setmetatable = tostring, next, rawget, setmetatable -local concat, sort = table.concat, table.sort +local sort = table.sort local format, gmatch, match = string.format, string.gmatch, string.match local rawget = rawget local lpegmatch = lpeg.match diff --git a/tex/context/base/attr-lay.lua b/tex/context/base/attr-lay.lua index 5e5e81ff1..059353116 100644 --- a/tex/context/base/attr-lay.lua +++ b/tex/context/base/attr-lay.lua @@ -14,7 +14,7 @@ if not modules then modules = { } end modules ['attr-lay'] = { local type = type local format = string.format -local insert, remove, concat = table.insert, table.remove, table.concat +local insert, remove = table.insert, table.remove local attributes, nodes, utilities, logs, backends = attributes, nodes, utilities, logs, backends local commands, context, interfaces = commands, context, interfaces diff --git a/tex/context/base/back-exp.lua b/tex/context/base/back-exp.lua index 56d300ba3..37af87505 100644 --- a/tex/context/base/back-exp.lua +++ b/tex/context/base/back-exp.lua @@ -22,10 +22,9 @@ local next, type = next, type local format, match, concat, rep, sub, gsub, gmatch, find = string.format, string.match, table.concat, string.rep, string.sub, string.gsub, string.gmatch, string.find local validstring = string.valid local lpegmatch = lpeg.match -local utfchar, utfbyte = utf.char, utf.byte +local utfchar, utfbyte, utfvalues = utf.char, utf.byte, utf.values local insert, remove = table.insert, table.remove local topoints = number.topoints -local utfvalues = string.utfvalues local fromunicode16 = fonts.mappings.fromunicode16 local sortedhash = table.sortedhash diff --git a/tex/context/base/bibl-bib.lua b/tex/context/base/bibl-bib.lua index 444f7e9bc..a995d7429 100644 --- a/tex/context/base/bibl-bib.lua +++ b/tex/context/base/bibl-bib.lua @@ -12,7 +12,6 @@ bibtex files and converts them to xml so that the we access the content in a convenient way. Actually handling the data takes place elsewhere.

--ldx]]-- -local utf = unicode.utf8 local lower, format, gsub, concat = string.lower, string.format, string.gsub, table.concat local next = next local utfchar = utf.char diff --git a/tex/context/base/buff-ini.lua b/tex/context/base/buff-ini.lua index 11d7cc9f6..3aa361297 100644 --- a/tex/context/base/buff-ini.lua +++ b/tex/context/base/buff-ini.lua @@ -16,9 +16,10 @@ local report_grabbing = logs.reporter("buffers","grabbing") local context, commands = context, commands local concat = table.concat -local type, next = type, next -local sub, format, match, find = string.sub, string.format, string.match, string.find -local count, splitlines, validstring = string.count, string.splitlines, string.valid +local type, next, load = type, next, load +local sub, format = string.sub, string.format +local splitlines, validstring = string.splitlines, string.valid +local P, Cs, patterns, lpegmatch = lpeg.P, lpeg.Cs, lpeg.patterns, lpeg.match local variables = interfaces.variables local settings_to_array = utilities.parsers.settings_to_array @@ -100,8 +101,6 @@ buffers.collectcontent = collectcontent commands.erasebuffer = erase commands.assignbuffer = assign -local P, patterns, lpegmatch = lpeg.P, lpeg.patterns, lpeg.match - local anything = patterns.anything local alwaysmatched = patterns.alwaysmatched @@ -128,6 +127,65 @@ local continue = false -- An \n is unlikely to show up as \r is the endlinechar but \n is more generic -- for us. +-- This fits the way we fetch verbatim: the indentatio before the sentinel +-- determines the stripping. + +-- str = [[ +-- test test test test test test test +-- test test test test test test test +-- test test test test test test test +-- +-- test test test test test test test +-- test test test test test test test +-- test test test test test test test +-- ]] + +-- local function undent(str) +-- local margin = match(str,"[\n\r]( +)[\n\r]*$") or "" +-- local indent = #margin +-- if indent > 0 then +-- local lines = splitlines(str) +-- local ok = true +-- local pattern = "^" .. margin +-- for i=1,#lines do +-- local l = lines[i] +-- if find(l,pattern) then +-- lines[i] = sub(l,indent+1) +-- else +-- ok = false +-- break +-- end +-- end +-- if ok then +-- return concat(lines,"\n") +-- end +-- end +-- return str +-- end + +local getmargin = (Cs(P(" ")^1)*P(-1)+1)^1 +local eol = patterns.eol +local whatever = (P(1)-eol)^0 * eol^1 + +local strippers = { } + +local function undent(str) -- new version, needs testing + local margin = lpegmatch(getmargin,str) + if type(margin) ~= "string" then + return str + end + local indent = #margin + if indent == 0 then + return str + end + local stripper = strippers[indent] + if not stripper then + stripper = Cs((P(margin)/"" * whatever + eol^1)^1) + strippers[indent] = stripper + end + return lpegmatch(stripper,str) or str +end + function commands.grabbuffer(name,begintag,endtag,bufferdata,catcodes) -- maybe move \\ to call local dn = getcontent(name) if dn == "" then @@ -165,25 +223,7 @@ function commands.grabbuffer(name,begintag,endtag,bufferdata,catcodes) -- maybe dn = sub(dn,1,-2) end if autoundent then - local margin = match(dn,"[\n\r]( +)[\n\r]*$") or "" - local indent = #margin - if indent > 0 then - local lines = splitlines(dn) - local ok = true - local pattern = "^" .. margin - for i=1,#lines do - local l = lines[i] - if find(l,pattern) then - lines[i] = sub(l,indent+1) - else - ok = false - break - end - end - if ok then - dn = concat(lines,"\n") - end - end + dn = undent(dn) end end assign(name,dn,catcodes) @@ -259,7 +299,7 @@ function commands.gettexbuffer(name) end function commands.getbufferctxlua(name) - local ok = loadstring(getcontent(name)) + local ok = load(getcontent(name)) if ok then ok() else diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index e0480df65..f35d7d2a9 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -12,12 +12,14 @@ if not modules then modules = { } end modules ['char-ini'] = { local tex = tex -local utfchar, utfbyte, utfvalues = utf.char, utf.byte, string.utfvalues -local ustring, utf = unicode.ustring, unicode.utf8 +local utfchar, utfbyte, utfvalues, ustring = utf.char, utf.byte, utf.values, utf.ustring local concat, unpack, tohash = table.concat, table.unpack, table.tohash local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch -local P, R, lpegmatch = lpeg.P, lpeg.R, lpeg.match +local P, R, Cs, lpegmatch, patterns = lpeg.P, lpeg.R, lpeg.Cs, lpeg.match, lpeg.patterns + +local utf8byte = patterns.utf8byte +local utf8char = patterns.utf8char local allocate = utilities.storage.allocate local mark = utilities.storage.mark @@ -62,7 +64,7 @@ end local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end) -lpeg.patterns.chartonumber = pattern +patterns.chartonumber = pattern local function chartonumber(k) if type(k) == "string" then @@ -746,7 +748,7 @@ characters.activeoffset = 0x10000 -- there will be remapped in that byte range -- table.setmetatableindex(utfbytes,function(t,k) local v= utfchar(k) t[k] = v return v end) -- table.setmetatableindex(utfchars,function(t,k) local v= utfbyte(k) t[k] = v return v end) -local function utfstring(s) +local function toutfstring(s) if type(s) == "table" then return utfchar(unpack(s)) -- concat { utfchar( unpack(s) ) } else @@ -754,7 +756,7 @@ local function utfstring(s) end end -utf.string = utf.string or utfstring +utf.tostring = toutfstring local categories = allocate() characters.categories = categories -- lazy table @@ -775,10 +777,10 @@ local ucchars = allocate() characters.ucchars = ucchars -- lazy table local shchars = allocate() characters.shchars = shchars -- lazy table local fschars = allocate() characters.fschars = fschars -- lazy table -setmetatableindex(lcchars, function(t,u) if u then local c = data[u] c = c and c.lccode c = c and utfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) -setmetatableindex(ucchars, function(t,u) if u then local c = data[u] c = c and c.uccode c = c and utfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) -setmetatableindex(shchars, function(t,u) if u then local c = data[u] c = c and c.shcode c = c and utfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) -setmetatableindex(fschars, function(t,u) if u then local c = data[u] c = c and c.fscode c = c and utfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) +setmetatableindex(lcchars, function(t,u) if u then local c = data[u] c = c and c.lccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) +setmetatableindex(ucchars, function(t,u) if u then local c = data[u] c = c and c.uccode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) +setmetatableindex(shchars, function(t,u) if u then local c = data[u] c = c and c.shcode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) +setmetatableindex(fschars, function(t,u) if u then local c = data[u] c = c and c.fscode c = c and toutfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end) local decomposed = allocate() characters.decomposed = decomposed -- lazy table local specials = allocate() characters.specials = specials -- lazy table @@ -857,32 +859,48 @@ function characters.unicodechar(asked) end end -function characters.lower(str) - local new, n = { }, 0 - for u in utfvalues(str) do - n = n + 1 - new[n] = lcchars[u] - end - return concat(new) -end +-- function characters.lower(str) +-- local new, n = { }, 0 +-- for u in utfvalues(str) do +-- n = n + 1 +-- new[n] = lcchars[u] +-- end +-- return concat(new) +-- end +-- +-- function characters.upper(str) +-- local new, n = { }, 0 +-- for u in utfvalues(str) do +-- n = n + 1 +-- new[n] = ucchars[u] +-- end +-- return concat(new) +-- end +-- +-- function characters.shaped(str) +-- local new, n = { }, 0 +-- for u in utfvalues(str) do +-- n = n + 1 +-- new[n] = shchars[u] +-- end +-- return concat(new) +-- end -function characters.upper(str) - local new, n = { }, 0 - for u in utfvalues(str) do - n = n + 1 - new[n] = ucchars[u] - end - return concat(new) -end +----- tolower = Cs((utf8byte/lcchars)^0) +----- toupper = Cs((utf8byte/ucchars)^0) +----- toshape = Cs((utf8byte/shchars)^0) -function characters.shaped(str) - local new, n = { }, 0 - for u in utfvalues(str) do - n = n + 1 - new[n] = shchars[u] - end - return concat(new) -end +local tolower = Cs((utf8char/lcchars)^0) +local toupper = Cs((utf8char/ucchars)^0) +local toshape = Cs((utf8char/shchars)^0) + +patterns.tolower = tolower +patterns.toupper = toupper +patterns.toshape = toshape + +function characters.lower (str) return lpegmatch(tolower,str) end +function characters.upper (str) return lpegmatch(toupper,str) end +function characters.shaped(str) return lpegmatch(toshape,str) end function characters.lettered(str,spacing) local new, n = { }, 0 diff --git a/tex/context/base/char-tex.lua b/tex/context/base/char-tex.lua index e6d6c41e0..91aa387b9 100644 --- a/tex/context/base/char-tex.lua +++ b/tex/context/base/char-tex.lua @@ -189,14 +189,13 @@ local convert_accents_strip = Cs((no_l * accents * no_r + accents + P(1))^0) local convert_commands_strip = Cs((no_l * commands * no_r + commands + P(1))^0) function characters.tex.toutf(str,strip) - if find(str,"\\") then -- we can start at the found position - if strip then - return lpegmatch(convert_accents_strip,lpegmatch(convert_commands_strip,str)) - else - return lpegmatch(convert_accents, lpegmatch(convert_commands, str)) - end + if not find(str,"\\") then -- we can start at the found position + return str + elseif strip then + return lpegmatch(convert_accents_strip,lpegmatch(convert_commands_strip,str)) + else + return lpegmatch(convert_accents, lpegmatch(convert_commands, str)) end - return str end --~ print(characters.tex.toutf([[\"{e}]]),true) diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua index 52fdfc0d0..54ace8c9b 100644 --- a/tex/context/base/char-utf.lua +++ b/tex/context/base/char-utf.lua @@ -19,10 +19,10 @@ in special kinds of output (for instance ).

over a string.

--ldx]]-- -local utfchar, utfbyte = utf.char, utf.byte local concat, gmatch, gsub, find = table.concat, string.gmatch, string.gsub, string.find -local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues +local utfchar, utfbyte, utfcharacters, utfvalues = utf.char, utf.byte, utf.characters, utf.values local allocate = utilities.storage.allocate +local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns local charfromnumber = characters.fromnumber @@ -194,6 +194,20 @@ local private = { utffilters.private = private +local tohigh = lpeg.replacer(low) -- frozen, only for basic tex +local tolow = lpeg.replacer(high) -- frozen, only for basic tex + +lpegpatterns.utftohigh = tohigh +lpegpatterns.utftolow = tolow + +function utffilters.harden(str) + return lpegmatch(tohigh,str) +end + +function utffilters.soften(str) + return lpegmatch(tolow,str) +end + local function set(ch) local cb if type(ch) == "number" then @@ -202,9 +216,12 @@ local function set(ch) cb = utfbyte(ch) end if cb < 256 then + escapes[ch] = "\\" .. ch low[ch] = utfchar(0x0F0000 + cb) + if ch == "%" then + ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted + end high[utfchar(0x0F0000 + cb)] = ch - escapes[ch] = "\\" .. ch end end diff --git a/tex/context/base/chem-ini.lua b/tex/context/base/chem-ini.lua index 192953ec9..c862e00e3 100644 --- a/tex/context/base/chem-ini.lua +++ b/tex/context/base/chem-ini.lua @@ -7,73 +7,37 @@ if not modules then modules = { } end modules ['chem-ini'] = { } local format = string.format -local lpegmatch = lpeg.match - -local P, R, V, Cc, Cs = lpeg.P, lpeg.R, lpeg.V, lpeg.Cc, lpeg.Cs +local lpegmatch, patterns = lpeg.match, lpeg.patterns local trace_molecules = false trackers.register("chemistry.molecules", function(v) trace_molecules = v end) local report_chemistry = logs.reporter("chemistry") -local context = context +local context = context +local cpatterns = patterns.context chemistry = chemistry or { } local chemistry = chemistry --[[ -

The next code is an adaptation of code from Wolfgang Schuster -as posted on the mailing list. This version supports nested -braces and unbraced integers as scripts. We could consider -spaces as terminals for them but first let collect a bunch -of input then.

+

The next code started out as adaptation of code from Wolfgang Schuster as +posted on the mailing list. The current version supports nested braces and +unbraced integers as scripts.

]]-- --- some lpeg, maybe i'll make an syst-lpg module - -local lowercase = R("az") -local uppercase = R("AZ") -local backslash = P("\\") -local csname = backslash * P(1) * (1-backslash)^0 -local plus = P("+") / "\\textplus " -local minus = P("-") / "\\textminus " -local digit = R("09") -local sign = plus + minus -local cardinal = digit^1 -local integer = sign^0 * cardinal - -local leftbrace = P("{") -local rightbrace = P("}") -local nobrace = 1 - (leftbrace + rightbrace) -local nested = P { leftbrace * (csname + sign + nobrace + V(1))^0 * rightbrace } -local any = P(1) - -local subscript = P("_") -local superscript = P("^") -local somescript = subscript + superscript - -local content = Cs(csname + nested + sign + any) - --- could be made more efficient - -local lowhigh = Cc("\\lohi{%s}{%s}") * subscript * content * superscript * content / format -local highlow = Cc("\\hilo{%s}{%s}") * superscript * content * subscript * content / format -local low = Cc("\\low{%s}") * subscript * content / format -local high = Cc("\\high{%s}") * superscript * content / format -local justtext = (1 - somescript)^1 -local parser = Cs((csname + lowhigh + highlow + low + high + sign + any)^0) - -chemistry.moleculeparser = parser -- can be used to avoid functioncall +local moleculeparser = cpatterns.scripted +chemistry.moleculeparser = moleculeparser function chemistry.molecule(str) - return lpegmatch(parser,str) + return lpegmatch(moleculeparser,str) end function commands.molecule(str) if trace_molecules then - local rep = lpegmatch(parser,str) + local rep = lpegmatch(moleculeparser,str) report_chemistry("molecule %s => %s",str,rep) context(rep) else - context(lpegmatch(parser,str)) + context(lpegmatch(moleculeparser,str)) end end diff --git a/tex/context/base/chem-str.lua b/tex/context/base/chem-str.lua index dc4bd746f..3ab2e53b6 100644 --- a/tex/context/base/chem-str.lua +++ b/tex/context/base/chem-str.lua @@ -21,24 +21,40 @@ if not modules then modules = { } end modules ['chem-str'] = { -- the current user interface is slightly different from the old one but hopefully users -- will like the added value. -local trace_structure = false trackers.register("chemistry.structure", function(v) trace_structure = v end) -local trace_metapost = false trackers.register("chemistry.metapost", function(v) trace_metapost = v end) -local trace_textstack = false trackers.register("chemistry.textstack", function(v) trace_textstack = v end) +-- directive_strictorder: one might set this to off when associated texts are disordered too + +local trace_structure = false trackers .register("chemistry.structure", function(v) trace_structure = v end) +local trace_metapost = false trackers .register("chemistry.metapost", function(v) trace_metapost = v end) +local trace_textstack = false trackers .register("chemistry.textstack", function(v) trace_textstack = v end) +local directive_strictorder = true directives.register("chemistry.strictorder", function(v) directive_strictorder = v end) +local directive_strictindex = false directives.register("chemistry.strictindex", function(v) directive_strictindex = v end) local report_chemistry = logs.reporter("chemistry") local format, gmatch, match, lower, gsub = string.format, string.gmatch, string.match, string.lower, string.gsub -local concat, insert, remove = table.concat, table.insert, table.remove +local concat, insert, remove, unique, sorted = table.concat, table.insert, table.remove, table.unique, table.sorted local processor_tostring = typesetters and typesetters.processors.tostring local settings_to_array = utilities.parsers.settings_to_array local settings_to_array_with_repeat = utilities.parsers.settings_to_array_with_repeat local lpegmatch = lpeg.match -local P, R, S, C, Cs, Ct, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc +local P, R, S, C, Cs, Ct, Cc, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc, lpeg.Cmt local variables = interfaces and interfaces.variables local context = context +local v_default = variables.default +local v_small = variables.small +local v_medium = variables.medium +local v_big = variables.big +local v_normal = variables.normal +local v_fit = variables.fit +local v_on = variables.on + +local mpnamedcolor = attributes.colors.mpnamedcolor +local topoints = number.topoints +local todimen = string.todimen + chemistry = chemistry or { } local chemistry = chemistry @@ -47,39 +63,44 @@ chemistry.format = "metafun" chemistry.structures = 0 local common_keys = { - b = "line", - r = "line", - sb = "line", - sr = "line", - rd = "line", - rh = "line", - cc = "line", - ccd = "line", - line = "line", - dash = "line", - arrow = "line", - c = "fixed", - cd = "fixed", - z = "text", - zt = "text", - zlt = "text", - zrt = "text", - rz = "text", - rt = "text", - lrt = "text", - rrt = "text", - zln = "number", - zrn = "number", - rn = "number", - lrn = "number", - rrn = "number", - zn = "number", - mov = "transform", - mark = "transform", - move = "transform", - off = "transform", - adj = "transform", - sub = "transform", + b = "line", + r = "line", + sb = "line", + sr = "line", + rd = "line", + rh = "line", + rb = "line", + rbd = "line", + cc = "line", + ccd = "line", + line = "line", + dash = "line", + arrow = "line", + c = "fixed", + cd = "fixed", + z = "text", + zt = "text", + zlt = "text", + zrt = "text", + rz = "text", + rt = "text", + lrt = "text", + rrt = "text", + label = "text", + zln = "number", + zrn = "number", + rn = "number", + lrn = "number", + rrn = "number", + zn = "number", + number = "number", + mov = "transform", + mark = "transform", + move = "transform", + diff = "transform", + off = "transform", + adj = "transform", + sub = "transform", } local front_keys = { @@ -89,6 +110,14 @@ local front_keys = { lr = "line", lsr = "line", rsr = "line", + lrd = "line", + rrd = "line", + lrh = "line", + rrh = "line", + lrbd = "line", + rrbd = "line", + lrb = "line", + rrb = "line", lrz = "text", rrz = "text", lsub = "transform", @@ -99,15 +128,14 @@ local one_keys = { db = "line", tb = "line", bb = "line", - rb = "line", dr = "line", hb = "line", bd = "line", bw = "line", oe = "line", sd = "line", - ld = "line", - rd = "line", + rdb = "line", + ldb = "line", ldd = "line", rdd = "line", ep = "line", @@ -130,9 +158,12 @@ local ring_keys = { rsr = "line", lrd = "line", rrd = "line", - rb = "line", lrb = "line", rrb = "line", + lrh = "line", + rrh = "line", + lrbd = "line", + rrbd = "line", dr = "line", eb = "line", er = "line", @@ -199,7 +230,10 @@ local syntax = { mp = { direct = '%s', arguments = 1 }, -- backdoor MP code - dangerous! } -local definitions = { } +chemistry.definitions = chemistry.definitions or { } +local definitions = chemistry.definitions + +storage.register("chemistry/definitions",definitions,"chemistry.definitions") function chemistry.undefine(name) definitions[lower(name)] = nil @@ -218,7 +252,7 @@ function chemistry.define(name,spec,text) } end -local metacode, variant, keys, max, txt, pstack, sstack +local metacode, variant, keys, max, txt, pstack, sstack, align local molecule = chemistry.molecule -- or use lpegmatch(chemistry.moleculeparser,...) local function fetch(txt) @@ -260,10 +294,10 @@ local special = (colon * C(other^1)) + Cc("") local text = (equal * C(P(1)^0)) + Cc(false) local pattern = - (amount + Cc(1)) * - (remapped + Cc("")) * - Cs(operation/lower) * - Cs(special/lower) * ( + (amount + Cc(1)) + * (remapped + Cc("")) + * Cs(operation/lower) + * Cs(special/lower) * ( range * Cc(false) * text + Cc(false) * Cc(false) * set * text + single * Cc(false) * Cc(false) * text + @@ -278,43 +312,88 @@ local pattern = -- print(lpegmatch(pattern,"RZ13=x")) -- 1 RZ false false table x local t_initialize = 'if unknown context_chem : input mp-chem.mpiv ; fi ;' -local t_start_structure = 'chem_start_structure(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' +local t_start_structure = 'chem_start_structure(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' local t_stop_structure = 'chem_stop_structure;' local t_start_component = 'chem_start_component;' local t_stop_component = 'chem_stop_component;' local t_line = 'chem_%s%s(%s,%s,%s,%s,%s);' local t_set = 'chem_set(%s);' -local t_number = 'chem_%s(%s,%s,"\\chemicaltext{%s}");' +local t_number = 'chem_%s%s(%s,%s,"\\chemicaltext{%s}");' local t_text = t_number local t_empty_normal = 'chem_%s(%s,%s,"");' local t_empty_center = 'chem_c%s(%s,%s,"");' local t_transform = 'chem_%s(%s,%s,%s);' -local function process(spec,text,n,rulethickness,rulecolor,offset) - insert(stack,{ spec=spec, text=text, n=n }) +local prepareMPvariable = commands and commands.prepareMPvariable + +local function process(level,spec,text,n,rulethickness,rulecolor,offset,default_variant) + insert(stack,{ spec = spec, text = text, n = n }) local txt = #stack local m = #metacode + local saved_rulethickness = rulethickness + local saved_rulecolor = rulecolor + local saved_align = align + local current_variant = default_variant or "six" for i=1,#spec do local step = spec[i] local s = lower(step) - local d = definitions[s] + local n = current_variant .. ":" .. s + local d = definitions[n] + if not d then + n = s + d = definitions[n] + end if d then if trace_structure then - report_chemistry("%s => definition: %s",step,s) + report_chemistry("%s > %s => definition: %s (%s snippets)",level,step,n,#d) end for i=1,#d do local di = d[i] - process(di.spec,di.text,1,rulethickness,rulecolor) -- offset? + current_variant = process(level+1,di.spec,di.text,1,rulethickness,rulecolor,offset,current_variant) -- offset? end else - --~local rep, operation, special, index, upto, set, text = lpegmatch(pattern,step) local factor, osign, operation, special, index, upto, set, text = lpegmatch(pattern,step) if trace_structure then local set = set and concat(set," ") or "-" - report_chemistry("%s => factor: %s, osign: %s operation: %s, special: %s, index: %s, upto: %s, set: %s, text: %s", - step,factor or "",osign or "",operation or "-",special and special ~= "" or "-",index or "-",upto or "-",set or "-",text or "-") + report_chemistry("%s > %s => factor: %s, osign: %s operation: %s, special: %s, index: %s, upto: %s, set: %s, text: %s", + level,step,factor or "",osign or "",operation or "-",special and special ~= "" or "-",index or "-",upto or "-",set or "-",text or "-") end - if operation == "pb" then + if operation == "rulecolor" then + local t = text + if not t then + txt, t = fetch(txt) + end + if t == v_default or t == v_normal or t == "" then + rulecolor = saved_rulecolor + elseif t then + rulecolor = mpnamedcolor(t) + end + elseif operation == "rulethickness" then + local t = text + if not t then + txt, t = fetch(txt) + end + if t == v_default or t == v_normal or t == t_medium or t == "" then + rulethickness = saved_rulethickness + elseif t == v_small then + rulethickness = topoints(1/1.2 * todimen(saved_rulethickness)) + elseif t == v_big then + rulethickness = topoints(1.2 * todimen(saved_rulethickness)) + elseif t then + -- rulethickness = topoints(todimen(t)) -- mp can't handle sp + rulethickness = topoints(tonumber(t) * todimen(saved_rulethickness)) + end + elseif operation == "symalign" then + local t = text + if not t then + txt, t = fetch(txt) + end + if t == v_default or t == v_normal then + align = saved_align + elseif t and t ~= "" then + align = "." .. t + end + elseif operation == "pb" then insert(pstack,variant) m = m + 1 ; metacode[m] = syntax.pb.direct if keys[special] == "text" and index then @@ -330,6 +409,7 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) keys, max = ss.keys, ss.max m = m + 1 ; metacode[m] = syntax[operation].direct m = m + 1 ; metacode[m] = format(t_set,variant) + current_variant = variant elseif operation == "save" then insert(sstack,variant) m = m + 1 ; metacode[m] = syntax.save.direct @@ -339,8 +419,52 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) keys, max = ss.keys, ss.max m = m + 1 ; metacode[m] = syntax[operation].direct m = m + 1 ; metacode[m] = format(t_set,variant) + current_variant = variant elseif operation then local ss = syntax[operation] + local what = keys[operation] + local ns = 0 + if set then + local sv = syntax[current_variant] + local ms = sv and sv.max + set = unique(set) + ns = #set + if directive_strictorder then + if what == "line" then + set = sorted(set) + end + if directive_strictindex and ms then + for i=ns,1,-1 do + local si = set[i] + if si > ms then + report_chemistry("%s > operation %s: limited to %s steps, ignoring %s", + level,operation,ms,si) + set[i] = nil + ns = ns - 1 + else + break + end + end + end + else + if directive_strictindex and ms then + local t, nt = { }, 0 + for i=1,ns do + local si = set[i] + if si > ms then + report_chemistry("%s > operation %s: limited to %s steps, ignoring %s", + level,operation,ms,si) + set[i] = nil + else + nt = nt + 1 + t[nt] = si + end + end + ns = nt + set = t + end + end + end if ss then local ds = ss.direct if ds then @@ -348,7 +472,7 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) if sa == 1 then local one ; txt, one = fetch(txt) m = m + 1 ; metacode[m] = format(ds,one or "") - elseif sa ==2 then + elseif sa == 2 then local one ; txt, one = fetch(txt) local two ; txt, two = fetch(txt) m = m + 1 ; metacode[m] = format(ds,one or "",two or "") @@ -358,118 +482,120 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) elseif ss.keys then variant, keys, max = s, ss.keys, ss.max m = m + 1 ; metacode[m] = format(t_set,variant) + current_variant = variant end - else - local what = keys[operation] - if what == "line" then - local s = osign - if s ~= "" then s = "." .. s end - if set then - -- condense consecutive numbers in a set to a range - -- (numbers modulo max are currently not dealt with...) - table.sort(set) - local sf, st = set[1] - for i=1,#set do - if i > 1 and set[i] ~= set[i-1]+1 then - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) - sf = set[i] - end - st = set[i] + elseif what == "line" then + local s = osign + if s ~= "" then + s = "." .. s + end + if set then + -- condense consecutive numbers in a set to a range + local sf, st = set[1] + for i=1,ns do + if i > 1 and set[i] ~= set[i-1]+1 then + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) + sf = set[i] end - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) - elseif upto then - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,upto,rulethickness,rulecolor) - elseif index then - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,index,rulethickness,rulecolor) - else - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,1,max,rulethickness,rulecolor) + st = set[i] end - elseif what == "number" then - if set then - for i=1,#set do - local si = set[i] - m = m + 1 ; metacode[m] = format(t_number,operation,variant,si,si) - end - elseif upto then - for i=index,upto do - local si = set[i] - m = m + 1 ; metacode[m] = format(t_number,operation,variant,si,si) - end - elseif index then - m = m + 1 ; metacode[m] = format(t_number,operation,variant,index,index) - else - for i=1,max do - m = m + 1 ; metacode[m] = format(t_number,operation,variant,i,i) - end + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) + elseif upto then + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,upto,rulethickness,rulecolor) + elseif index then + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,index,rulethickness,rulecolor) + else + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,1,max,rulethickness,rulecolor) + end + elseif what == "number" then + if set then + for i=1,ns do + local si = set[i] + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,si,si) end - elseif what == "text" then - if set then - for i=1,#set do - local si = set[i] - local t = text - if not t then txt, t = fetch(txt) end - if t then - t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,si,t) - end - end - elseif upto then - for i=index,upto do - local t = text - if not t then txt, t = fetch(txt) end - if t then - t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,i,t) - end - end - elseif index == 0 then + elseif upto then + for i=index,upto do + local si = set[i] + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,si,si) + end + elseif index then + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,index,index) + else + for i=1,max do + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,i,i) + end + end + elseif what == "text" then + if set then + for i=1,ns do + local si = set[i] local t = text if not t then txt, t = fetch(txt) end if t then t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,index,t) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,si,t) end - elseif index then + end + elseif upto then + for i=index,upto do local t = text if not t then txt, t = fetch(txt) end if t then t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,index,t) - end - else - for i=1,max do - local t = text - if not t then txt, t = fetch(txt) end - if t then - t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,i,t) - end + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,i,t) end end - elseif what == "transform" then - if osign == "m" then factor = -factor end - if set then - for i=1,#set do - local si = set[i] - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,si,factor) - end - elseif upto then - for i=index,upto do - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,i,factor) + elseif index == 0 then + local t = text + if not t then txt, t = fetch(txt) end + if t then + t = molecule(processor_tostring(t)) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,index,t) + end + elseif index then + local t = text + if not t then txt, t = fetch(txt) end + if t then + t = molecule(processor_tostring(t)) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,index,t) + end + else + for i=1,max do + local t = text + if not t then txt, t = fetch(txt) end + if t then + t = molecule(processor_tostring(t)) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,i,t) end - else - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,index or 1,factor) end - elseif what == "fixed" then - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,rulethickness,rulecolor) - elseif trace_structure then - report_chemistry("warning: undefined operation %s ignored here", operation or "") end + elseif what == "transform" then + if osign == "m" then + factor = -factor + end + if set then + for i=1,ns do + local si = set[i] + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,si,factor) + end + elseif upto then + for i=index,upto do + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,i,factor) + end + else + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,index or 1,factor) + end + elseif what == "fixed" then + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,rulethickness,rulecolor) + elseif trace_structure then + report_chemistry("%s > warning: undefined operation %s ignored here", + level, operation or "") end end end end remove(stack) + return current_variant end -- the size related values are somewhat special but we want to be @@ -480,19 +606,22 @@ end function chemistry.start(settings) chemistry.structures = chemistry.structures + 1 local emwidth, rulethickness, rulecolor, axiscolor = settings.emwidth, settings.rulethickness, settings.rulecolor, settings.framecolor - local width, height, scale, offset = settings.width or 0, settings.height or 0, settings.scale or "normal", settings.offset or 0 + local width, height, scale, rotation, offset = settings.width or 0, settings.height or 0, settings.scale or "normal", settings.rotation or 0, settings.offset or 0 local l, r, t, b = settings.left or 0, settings.right or 0, settings.top or 0, settings.bottom or 0 -- metacode = { } -- + align = settings.symalign or "auto" if trace_structure then - report_chemistry("scale: %s, width: %s, height: %s, l: %s, r: %s, t: %s, b: %s", scale, width, height, l, r, t, b) + report_chemistry("scale: %s, rotation: %s, width: %s, height: %s, l: %s, r: %s, t: %s, b: %s", scale, rotation, width, height, l, r, t, b) + report_chemistry("symalign: %s", align) end - if scale == variables.small then + if align ~= "" then align = "." .. align end + if scale == v_small then scale = 1/1.2 - elseif scale == variables.normal or scale == variables.medium or scale == 0 then + elseif scale == v_normal or scale == v_medium or scale == 0 then scale = 1 - elseif scale == variables.big then + elseif scale == v_big then scale = 1.2 else scale = tonumber(scale) @@ -504,7 +633,7 @@ function chemistry.start(settings) scale = .01 end end - if width == variables.fit then + if width == v_fit then width = true else width = tonumber(width) or 0 @@ -529,7 +658,7 @@ function chemistry.start(settings) end width = false end - if height == variables.fit then + if height == v_fit then height = true else height = tonumber(height) or 0 @@ -554,12 +683,13 @@ function chemistry.start(settings) end height = false end + rotation = tonumber(rotation) or 0 -- metacode[#metacode+1] = format(t_start_structure, chemistry.structures, - l, r, t, b, scale, + l, r, t, b, scale, rotation, tostring(width), tostring(height), tostring(emwidth), tostring(offset), - tostring(settings.axis == variables.on), tostring(rulethickness), tostring(axiscolor) + tostring(settings.axis == v_on), tostring(rulethickness), tostring(axiscolor) ) -- variant, keys, stack, pstack, sstack = "one", { }, { }, { }, { } @@ -590,7 +720,7 @@ function chemistry.component(spec,text,settings) local text = settings_to_array_with_repeat(text,true) -- inspect(spec) metacode[#metacode+1] = t_start_component - process(spec,text,1,rulethickness,rulecolor) -- offset? + process(1,spec,text,1,rulethickness,rulecolor) -- offset? metacode[#metacode+1] = t_stop_component end diff --git a/tex/context/base/chem-str.mkiv b/tex/context/base/chem-str.mkiv index 205675c46..cb840ed80 100644 --- a/tex/context/base/chem-str.mkiv +++ b/tex/context/base/chem-str.mkiv @@ -141,6 +141,7 @@ \def\chem_start[#1][#2]% {\ifmmode\vcenter\else\vbox\fi \bgroup + \synchronizestrut{\chemicalparameter\c!strut}% \dontcomplain \settrue\indisplaychemical \forgetall @@ -164,6 +165,8 @@ top = \chemicalparameter\c!top, bottom = \chemicalparameter\c!bottom, scale = "\chemicalparameter\c!scale", + rotation = "\chemicalparameter\c!rotation", + symalign = "\chemicalparameter\c!symalign", axis = "\chemicalparameter\c!axis", framecolor = "\MPcolor{\chemicalparameter\c!framecolor}", rulethickness = "\the\dimexpr\chemicalparameter\c!rulethickness\relax", @@ -358,13 +361,28 @@ % \let\chemicalsmashedleft \chemicalleftcentered % \let\chemicalsmashedright \chemicalrightcentered -\unexpanded\def\chemicalalignedtext#1#2#3% +\unexpanded\def\chemicalalignedtext + {\ifmmode + \expandafter\chem_aligned_text_math + \else + \expandafter\chem_aligned_text_text + \fi} + +\def\chem_aligned_text_text#1#2#3% {\dontleavehmode \begingroup \usechemicalstyleandcolor\c!style\c!color \hbox to \fontcharwd\font`C{\setstrut\strut#1\molecule{#3}#2}% \endgroup} +\def\chem_aligned_text_math#1#2#3% + {\dontleavehmode + \begingroup + \scratchcounter\normalmathstyle + \usechemicalstyleandcolor\c!style\c!color + \hbox to \fontcharwd\font`C{\setstrut\strut#1\mathematics{\tf\triggermathstyle\scratchcounter\molecule{#3}}#2}% + \endgroup} + \unexpanded\def\chemicalcentered {\chemicalalignedtext\hss \hss } \unexpanded\def\chemicalleftcentered {\chemicalalignedtext\relax\hss } \unexpanded\def\chemicalrightcentered{\chemicalalignedtext\hss \relax} @@ -436,18 +454,37 @@ % inline +% \unexpanded\def\chemical +% {\ifinformula +% \expandafter\displaychemical +% \else +% \expandafter\inlinechemical +% \fi} + \unexpanded\def\chemical {\ifinformula - \expandafter\displaychemical + \expandafter\indisplaychemical \else \expandafter\inlinechemical \fi} +\unexpanded\def\indisplaychemical + {\mathstylecommand\displaychemical\inlinechemical\inlinechemical} + +\unexpanded\def\inlinechemical#1% + {\dontleavehmode + \begingroup + \scratchcounter\normalmathstyle + \usechemicalstyleandcolor\c!style\c!color + \hbox{\mathematics{\tf\triggermathstyle\scratchcounter\ctxcommand{inlinechemical(\!!bs#1\!!es)}}}% + \endgroup} + \unexpanded\def\displaychemical {\dotriplegroupempty\chem_display} -\def\chem_display#1#2#3% todo: - {\the\everychemical \everychemical\emptytoks +\def\chem_display#1#2#3% + {\the\everychemical + \everychemical\emptytoks \quad \vcenter\bgroup \usechemicalstyleandcolor\c!style\c!color @@ -644,28 +681,50 @@ \c!offset=\v!overlay, \c!frame=\v!off] -\definecolor [lightblue] [h=add8e6] % a nice X11 color +\definecolor % private color + [chemicalframecolor] + [r=.75,g=.85,b=.95] \setupchemical [\c!frame=, \c!width=\v!fit, % or unitless number, multiplies scale*EmWidth \c!height=\v!fit, % or unitless number, multiplies scale*EmWidth - \c!left=0, % or unitless number, multiplies scale*EmWidth - \c!right=0, % or unitless number, multiplies scale*EmWidth - \c!top=0, % or unitless number, multiplies scale*EmWidth - \c!bottom=0, % or unitless number, multiplies scale*EmWidth + \c!left=0, % unitless number, multiplies scale*EmWidth + \c!right=0, % unitless number, multiplies scale*EmWidth + \c!top=0, % unitless number, multiplies scale*EmWidth + \c!bottom=0, % unitless number, multiplies scale*EmWidth \c!bodyfont=, - \c!scale=\v!normal, % small, normal or medium, big, unitless number (multiplies EmWidth) + \c!scale=\v!normal, % small, normal or medium, big, or unitless number (multiplies EmWidth) \c!size=\v!medium, \c!textsize=\v!big, % how is textsize used?? \c!axis=\v!off, \c!style=\rm, - \c!location=, + \c!rotation=0, % unitless number (interpreted as degrees) + \c!symalign=\v!auto, + \c!location=, % not yet used (was interaction related in mkii) \c!offset=.25em, \c!color=, - \c!framecolor=lightblue, + \c!strut=\v!yes, + \c!framecolor=chemicalframecolor, \c!rulethickness=0.6pt, %1.5\linewidth, \c!rulecolor=, \c!factor=1] % how is factor used?? +%D Compatibility: + +\definechemical[+R] {\chemical[RR]} +\definechemical[-R] {\chemical[LR]} + +\definechemical[CARBON:CB] {\chemical[NEWMANSTAGGER,C,SB]} +\definechemical[NEWMANSTAGGER:CB] {\chemical[NEWMANSTAGGER,C,SB]} +\definechemical[NEWMANECLIPSED:CB]{\chemical[NEWMANECLIPSED,C,SB]} +\definechemical[CARBON:CB1] {\chemical[CARBON,C,SB,Z234,1.5MOV1,MIR0,C,SB,Z234]} + +\definechemical[NEWMAN] {\chemical[]} +\definechemical[STAGGER] {\chemical[NEWMANSTAGGER]} +\definechemical[ECLIPSE] {\chemical[NEWMANECLIPSED]} +\definechemical[ECLIPSED] {\chemical[NEWMANECLIPSED]} +\definechemical[SIX:FRONT] {\chemical[SIXFRONT]} +\definechemical[FIVE:FRONT] {\chemical[FIVEFRONT]} + \protect \endinput diff --git a/tex/context/base/cldf-bas.mkiv b/tex/context/base/cldf-bas.mkiv index f2bd05177..f8b5b5d6a 100644 --- a/tex/context/base/cldf-bas.mkiv +++ b/tex/context/base/cldf-bas.mkiv @@ -14,5 +14,6 @@ \writestatus{loading}{ConTeXt Lua Documents / Basics} \registerctxluafile{cldf-bas}{1.001} +\registerctxluafile{cldf-prs}{1.001} \endinput diff --git a/tex/context/base/cldf-com.lua b/tex/context/base/cldf-com.lua index d9062594e..fa0dbed3e 100644 --- a/tex/context/base/cldf-com.lua +++ b/tex/context/base/cldf-com.lua @@ -17,9 +17,8 @@ generics.stoptabulate = "stoptabulate" -- "stop" .. variables.tabulate -- tod local NC, NR = context.NC, context.NR local function tabulaterow(how,...) - local t = { ... } - for i=1,#t do - local ti = tostring(t[i]) + for i=1,select("#",...) do + local ti = tostring(select(i,...)) NC() if how then context[how](ti) diff --git a/tex/context/base/cldf-ini.lua b/tex/context/base/cldf-ini.lua index 84ae7314e..b045282b1 100644 --- a/tex/context/base/cldf-ini.lua +++ b/tex/context/base/cldf-ini.lua @@ -25,10 +25,10 @@ local tex = tex context = context or { } local context = context -local format, find, gmatch, gsub, validstring = string.format, string.find, string.gmatch, string.gsub, string.valid +local format, gsub, validstring = string.format, string.gsub, string.valid local next, type, tostring, tonumber, setmetatable = next, type, tostring, tonumber, setmetatable local insert, remove, concat = table.insert, table.remove, table.concat -local lpegmatch, lpegC, lpegS, lpegP, lpegCc = lpeg.match, lpeg.C, lpeg.S, lpeg.P, lpeg.Cc +local lpegmatch, lpegC, lpegS, lpegP, lpegCc, patterns = lpeg.match, lpeg.C, lpeg.S, lpeg.P, lpeg.Cc, lpeg.patterns local texsprint = tex.sprint local textprint = tex.tprint @@ -162,8 +162,8 @@ context.popcatcodes = popcatcodes --~ content / texsprint --~ )^0 -local newline = lpeg.patterns.newline -local space = lpeg.patterns.spacer +local newline = patterns.newline +local space = patterns.spacer local spacing = newline * space^0 local content = lpegC((1-spacing)^1) -- texsprint local emptyline = space^0 * newline^2 -- texprint("") @@ -357,6 +357,8 @@ end -- -- -- +local containseol = patterns.containseol + local function writer(parent,command,first,...) -- already optimized before call local t = { first, ... } flush(currentcatcodes,command) -- todo: ctx|prt|texcatcodes @@ -377,7 +379,7 @@ local function writer(parent,command,first,...) -- already optimized before call flush(currentcatcodes,"{}") elseif typ == "string" then -- is processelines seen ? - if processlines and find(ti,"[\n\r]") then -- we can check for ti == "\n" + if processlines and lpegmatch(containseol,ti) then flush(currentcatcodes,"{") local flushlines = parent.__flushlines or flushlines flushlines(ti) @@ -529,7 +531,7 @@ local function caller(parent,f,a,...) if typ == "string" then if a then flush(contentcatcodes,format(f,a,...)) -- was currentcatcodes - elseif processlines and find(f,"[\n\r]") then + elseif processlines and lpegmatch(containseol,f) then local flushlines = parent.__flushlines or flushlines flushlines(f) else @@ -548,10 +550,9 @@ local function caller(parent,f,a,...) if f then if a ~= nil then local flushlines = parent.__flushlines or flushlines - flushlines(f) - -- ignore ... maybe some day + flushlines(a) else - flushdirect(currentcatcodes,"\r") + flushdirect(currentcatcodes,"\n") -- no \r, else issues with \startlines ... use context.par() otherwise end else if a ~= nil then @@ -635,6 +636,11 @@ local currenttrace = nil local nofwriters = 0 local nofflushes = 0 +local visualizer = lpeg.replacer { + { "\n","<>" }, + { "\r","<>" }, +} + statistics.register("traced context", function() if nofwriters > 0 or nofflushes > 0 then return format("writers: %s, flushes: %s, maxstack: %s",nofwriters,nofflushes,_n_f_) @@ -648,7 +654,7 @@ local tracedwriter = function(parent,...) -- also catcodes ? local t, n = { "w : - : " }, 1 local traced = function(normal,catcodes,...) -- todo: check for catcodes local s = concat({...}) - s = gsub(s,"\r","<>") -- unlikely + s = lpegmatch(visualizer,s) n = n + 1 t[n] = s normal(catcodes,...) @@ -676,7 +682,7 @@ local traced = function(normal,one,two,...) local argtype = type(argument) c = c + 1 if argtype == "string" then - collapsed[c] = gsub(argument,"\r","<>") + collapsed[c] = lpegmatch(visualizer,argument) elseif argtype == "number" then collapsed[c] = argument else @@ -689,7 +695,7 @@ local traced = function(normal,one,two,...) normal(one) local argtype = type(one) if argtype == "string" then - currenttrace(format("f : - : %s",gsub(one,"\r","<>"))) + currenttrace(format("f : - : %s",lpegmatch(visualizer,one))) elseif argtype == "number" then currenttrace(format("f : - : %s",one)) else diff --git a/tex/context/base/cldf-prs.lua b/tex/context/base/cldf-prs.lua new file mode 100644 index 000000000..42757be19 --- /dev/null +++ b/tex/context/base/cldf-prs.lua @@ -0,0 +1,52 @@ +if not modules then modules = { } end modules ['cldf-bas'] = { + version = 1.001, + comment = "companion to cldf-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, R, V, Cc, Cs = lpeg.P, lpeg.R, lpeg.V, lpeg.Cc, lpeg.Cs +local format = string.format + +local cpatterns = patterns.context or { } +patterns.context = cpatterns + +local backslash = P("\\") +local csname = backslash * P(1) * (1-backslash)^0 +local sign = P("+") / "\\textplus " + + P("-") / "\\textminus " +local leftbrace = P("{") +local rightbrace = P("}") +local nested = P { leftbrace * (V(1) + (1-rightbrace))^0 * rightbrace } +local subscript = P("_") +local superscript = P("^") +local utf8char = patterns.utf8char +local cardinal = patterns.cardinal + +-- local content = Cs(csname + nested + sign^-1 * (csname + cardinal + utf8char)) +-- local lowfirst = subscript * ( Cc("\\lohi{%s}{%s}") * content * superscript + Cc("\\low{%s}" ) ) * content / format +-- local highfirst = superscript * ( Cc("\\hilo{%s}{%s}") * content * subscript + Cc("\\high{%s}") ) * content / format +-- local scripts = Cs(lowfirst + highfirst) + +local scripts = P { "start", + start = V("csname") + V("lowfirst") + V("highfirst"), + csname = csname, + content = Cs(V("csname") + nested + sign^-1 * (cardinal + utf8char)), + lowfirst = subscript * ( Cc("\\lohi{%s}{%s}") * V("content") * superscript + Cc("\\low{%s}" ) ) * V("content") / format, + highfirst = superscript * ( Cc("\\hilo{%s}{%s}") * V("content") * subscript + Cc("\\high{%s}") ) * V("content") / format, + } + +local scripted = Cs((csname + scripts + utf8char)^0) + +cpatterns.scripts = scripts +cpatterns.csname = csname +cpatterns.scripted = scripted +cpatterns.nested = nested + +-- inspect(scripted) + +-- print(lpegmatch(scripted,"10^-3")) +-- print(lpegmatch(scripted,"10^-a")) + diff --git a/tex/context/base/colo-ini.lua b/tex/context/base/colo-ini.lua index 4fdbf4545..1a994b567 100644 --- a/tex/context/base/colo-ini.lua +++ b/tex/context/base/colo-ini.lua @@ -38,6 +38,8 @@ local attributes_list = attributes.list local colorvalues = colors.values local transparencyvalues = transparencies.values +local texattribute = tex.attribute + colors.sets = colors.sets or { } -- sets are mostly used for local colorsets = colors.sets -- showing lists of defined local colorset = { } -- colors @@ -503,12 +505,17 @@ local function mpcolor(model,ca,ta,default) end end +local function mpnamedcolor(name) + return mpcolor(texattribute[a_colorspace],l_color[name] or l_color.black) +end + local function mpoptions(model,ca,ta,default) -- will move to mlib-col return format("withcolor %s",mpcolor(model,ca,ta,default)) end -colors.mpcolor = mpcolor -colors.mpoptions = mpoptions +colors.mpcolor = mpcolor +colors.mpnamedcolor = mpnamedcolor +colors.mpoptions = mpoptions function colors.formatcolor(ca,separator) local cv = colorvalues[ca] diff --git a/tex/context/base/cont-new.mkii b/tex/context/base/cont-new.mkii index 3f5956611..e5cdd0410 100644 --- a/tex/context/base/cont-new.mkii +++ b/tex/context/base/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2012.12.10 23:20} +\newcontextversion{2012.12.24 20:08} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index e67181585..4e62d04f2 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2012.12.10 23:20} +\newcontextversion{2012.12.24 20:08} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/cont-yes.mkiv b/tex/context/base/cont-yes.mkiv index 0b7fd621e..5eae3eecb 100644 --- a/tex/context/base/cont-yes.mkiv +++ b/tex/context/base/cont-yes.mkiv @@ -20,6 +20,8 @@ -- When a style is loaded there is a good change that we never enter -- this code. + local report = logs.reporter("system") + environment.initializefilenames() -- todo: check if we really need to pre-prep the filename local arguments = environment.arguments @@ -32,12 +34,16 @@ -- can be part of (any) loaded (sub) file. The \starttext -- wrapping might go away. + report("processing as xml: %s",filename) + context.starttext() context.xmlprocess("main",filename,"") context.stoptext() elseif suffix == "cld" or arguments.forcecld then + report("processing as cld: %s",filename) + context.runfile(filename) elseif suffix == "lua" or arguments.forcelua then @@ -45,12 +51,16 @@ -- The wrapping might go away. Why is is it there in the -- first place. + report("processing as lua: %s",filename) + context.starttext() context.ctxlua(string.format('dofile("%s")',filename)) context.stoptext() elseif suffix == "mp" or arguments.forcemp then + report("processing as metapost: %s",filename) + context.starttext() context.processMPfigurefile(filename) context.stoptext() @@ -66,6 +76,7 @@ else + -- \writestatus{system}{processing as tex} -- We have a regular tex file so no \starttext yet as we can -- load fonts. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf index 73f23e3ab..4feb1ffe5 100644 Binary files a/tex/context/base/context-version.pdf and b/tex/context/base/context-version.pdf differ diff --git a/tex/context/base/context-version.png b/tex/context/base/context-version.png index fb3daf3d5..882ca4262 100644 Binary files a/tex/context/base/context-version.png and b/tex/context/base/context-version.png differ diff --git a/tex/context/base/context.mkii b/tex/context/base/context.mkii index dfe757926..d1b87a40f 100644 --- a/tex/context/base/context.mkii +++ b/tex/context/base/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2012.12.10 23:20} +\edef\contextversion{2012.12.24 20:08} %D For those who want to use this: diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index e47b0e4f1..56e4b0dc1 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -25,7 +25,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2012.12.10 23:20} +\edef\contextversion{2012.12.24 20:08} %D For those who want to use this: diff --git a/tex/context/base/core-con.lua b/tex/context/base/core-con.lua index 96f3a00ea..11558102e 100644 --- a/tex/context/base/core-con.lua +++ b/tex/context/base/core-con.lua @@ -16,8 +16,6 @@ slower but look nicer this way.

local command, context = commands, context -local utf = unicode.utf8 - local floor, date, time, concat = math.floor, os.date, os.time, table.concat local lower, format, rep, match = string.lower, string.format, string.rep, string.match local utfchar, utfbyte = utf.char, utf.byte diff --git a/tex/context/base/core-uti.lua b/tex/context/base/core-uti.lua index 31f13a0fc..5f00d6fff 100644 --- a/tex/context/base/core-uti.lua +++ b/tex/context/base/core-uti.lua @@ -135,6 +135,7 @@ local packlist = { local jobpacker = packers.new(packlist,1.01) job.pack = true +-- job.pack = false directives.register("job.pack",function(v) pack = v end) diff --git a/tex/context/base/data-env.lua b/tex/context/base/data-env.lua index f016881b5..2ee25120e 100644 --- a/tex/context/base/data-env.lua +++ b/tex/context/base/data-env.lua @@ -24,6 +24,8 @@ resolvers.suffixes = suffixes resolvers.dangerous = dangerous resolvers.suffixmap = suffixmap +local luasuffixes = utilities.lua.suffixes + local relations = allocate { -- todo: handlers also here core = { ofm = { -- will become obsolete @@ -109,7 +111,7 @@ local relations = allocate { -- todo: handlers also here lua = { names = { "lua" }, variable = 'LUAINPUTS', - suffixes = { 'lua', 'luc', 'tma', 'tmc' }, + suffixes = { luasuffixes.lua, luasuffixes.luc, luasuffixes.tma, luasuffixes.tmc }, }, lib = { names = { "lib" }, diff --git a/tex/context/base/data-ini.lua b/tex/context/base/data-ini.lua index a952a29bd..37b4f62ca 100644 --- a/tex/context/base/data-ini.lua +++ b/tex/context/base/data-ini.lua @@ -7,7 +7,6 @@ if not modules then modules = { } end modules ['data-ini'] = { } local gsub, find, gmatch, char = string.gsub, string.find, string.gmatch, string.char -local concat = table.concat local next, type = next, type local filedirname, filebasename, filejoin = file.dirname, file.basename, file.join @@ -98,6 +97,10 @@ do local args = environment.originalarguments or arg -- this needs a cleanup + if not environment.ownmain then + environment.ownmain = status and string.match(string.lower(status.banner),"this is ([%a]+)") or "luatex" + end + local ownbin = environment.ownbin or args[-2] or arg[-2] or args[-1] or arg[-1] or arg[0] or "luatex" local ownpath = environment.ownpath or os.selfdir @@ -214,19 +217,6 @@ end environment.texroot = file.collapsepath(texroot) --- Tracing. Todo ... - -function resolvers.settrace(n) -- no longer number but: 'locating' or 'detail' - if n then - trackers.disable("resolvers.*") - trackers.enable("resolvers."..n) - end -end - -resolvers.settrace(osgetenv("MTX_INPUT_TRACE")) - --- todo: - if profiler then directives.register("system.profile",function() profiler.start("luatex-profile.log") diff --git a/tex/context/base/data-lua.lua b/tex/context/base/data-lua.lua index c63851525..de20f4820 100644 --- a/tex/context/base/data-lua.lua +++ b/tex/context/base/data-lua.lua @@ -14,6 +14,8 @@ if not modules then modules = { } end modules ['data-lua'] = { -- -- local mylib = require("libtest") -- -- local mysql = require("luasql.mysql") +local searchers = package.searchers or package.loaders + local concat = table.concat local trace_libraries = false @@ -115,13 +117,9 @@ function package.extraclibpath(...) end end -if not package.loaders then - package.loaders = package.searchers -- 5.2 -end - -if not package.loaders[-2] then +if not searchers[-2] then -- use package-path and package-cpath - package.loaders[-2] = package.loaders[2] + searchers[-2] = searchers[2] end local function loadedaslib(resolved,rawname) @@ -132,7 +130,7 @@ local function loadedbylua(name) if trace_libraries then report_libraries("! locating %q using normal loader",name) end - local resolved = package.loaders[-2](name) + local resolved = searchers[-2](name) end local function loadedbyformat(name,rawname,suffixes,islib) @@ -187,7 +185,7 @@ local function notloaded(name) end end -package.loaders[2] = function(name) +searchers[2] = function(name) local thename = gsub(name,"%.","/") local luaname = file.addsuffix(thename,"lua") local libname = file.addsuffix(thename,os.libsuffix) @@ -201,7 +199,7 @@ package.loaders[2] = function(name) or notloaded (name) end --- package.loaders[3] = nil --- package.loaders[4] = nil +-- searchers[3] = nil +-- searchers[4] = nil resolvers.loadlualib = require diff --git a/tex/context/base/data-pre.lua b/tex/context/base/data-pre.lua index 8067355d3..e48a5aa8c 100644 --- a/tex/context/base/data-pre.lua +++ b/tex/context/base/data-pre.lua @@ -19,10 +19,9 @@ local resolvers = resolvers local prefixes = utilities.storage.allocate() resolvers.prefixes = prefixes -local gsub = string.gsub local cleanpath, findgivenfile, expansion = resolvers.cleanpath, resolvers.findgivenfile, resolvers.expansion local getenv = resolvers.getenv -- we can probably also use resolvers.expansion -local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match +local P, S, R, C, Cs, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cs, lpeg.match local joinpath, basename, dirname = file.join, file.basename, file.dirname local getmetatable, rawset, type = getmetatable, rawset, type @@ -144,6 +143,28 @@ end -- todo: use an lpeg (see data-lua for !! / stripper) +-- local function resolve(str) -- use schemes, this one is then for the commandline only +-- if type(str) == "table" then +-- local t = { } +-- for i=1,#str do +-- t[i] = resolve(str[i]) +-- end +-- return t +-- else +-- local res = resolved[str] +-- if not res then +-- res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added) +-- resolved[str] = res +-- abstract[res] = str +-- end +-- return res +-- end +-- end + +-- home:xx;selfautoparent:xx; + +local pattern = Cs((C(R("az")^2) * P(":") * C((1-S(" \"\';,"))^1) / _resolve_ + P(1))^0) + local function resolve(str) -- use schemes, this one is then for the commandline only if type(str) == "table" then local t = { } @@ -154,7 +175,7 @@ local function resolve(str) -- use schemes, this one is then for the commandline else local res = resolved[str] if not res then - res = gsub(str,"([a-z][a-z]+):([^ \"\';,]*)",_resolve_) -- home:xx;selfautoparent:xx; etc (comma added) + res = lpegmatch(pattern,str) resolved[str] = res abstract[res] = str end diff --git a/tex/context/base/data-res.lua b/tex/context/base/data-res.lua index 25aaa6c35..41a590228 100644 --- a/tex/context/base/data-res.lua +++ b/tex/context/base/data-res.lua @@ -31,6 +31,7 @@ local joinpath = file.joinpath local allocate = utilities.storage.allocate local settings_to_array = utilities.parsers.settings_to_array local setmetatableindex = table.setmetatableindex +local luasuffixes = utilities.lua.suffixes local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) local trace_detail = false trackers.register("resolvers.details", function(v) trace_detail = v end) @@ -1607,15 +1608,19 @@ function resolvers.dowithvariable(name,func) end function resolvers.locateformat(name) - local barename = file.removesuffix(name) -- gsub(name,"%.%a+$","") - local fmtname = caches.getfirstreadablefile(barename..".fmt","formats") or "" + local engine = environment.ownmain or "luatex" + local barename = file.removesuffix(name) + local fullname = file.addsuffix(barename,"fmt") + local fmtname = caches.getfirstreadablefile(fullname,"formats",engine) or "" if fmtname == "" then - fmtname = resolvers.findfile(barename..".fmt") + fmtname = resolvers.findfile(fullname) fmtname = resolvers.cleanpath(fmtname) end if fmtname ~= "" then local barename = file.removesuffix(fmtname) - local luaname, lucname, luiname = barename .. ".lua", barename .. ".luc", barename .. ".lui" + local luaname = file.addsuffix(barename,luasuffixes.lua) + local lucname = file.addsuffix(barename,luasuffixes.luc) + local luiname = file.addsuffix(barename,luasuffixes.lui) if lfs.isfile(luiname) then return barename, luiname elseif lfs.isfile(lucname) then diff --git a/tex/context/base/data-sch.lua b/tex/context/base/data-sch.lua index 569fa5c94..7eb254557 100644 --- a/tex/context/base/data-sch.lua +++ b/tex/context/base/data-sch.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['data-sch'] = { license = "see context related readme files" } -local loadstring = loadstring +local load = load local gsub, concat, format = string.gsub, table.concat, string.format local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders @@ -192,7 +192,7 @@ schemes.fetchstring = fetchstring function schemes.fetchtable(url,data) local reply = fetchstring(url,data) if reply then - local s = loadstring("return " .. reply) + local s = load("return " .. reply) if s then return s() end diff --git a/tex/context/base/data-tex.lua b/tex/context/base/data-tex.lua index 639bb0ac9..f19b53407 100644 --- a/tex/context/base/data-tex.lua +++ b/tex/context/base/data-tex.lua @@ -18,7 +18,7 @@ local resolvers = resolvers local sequencers = utilities.sequencers local methodhandler = resolvers.methodhandler local splitlines = string.splitlines -local utffiletype = unicode.filetype +local utffiletype = utf.filetype -- local fileprocessor = nil -- local lineprocessor = nil @@ -75,13 +75,13 @@ function helpers.textopener(tag,filename,filehandle,coding) report_tex("%s opener, '%s' opened using method '%s'",tag,filename,coding) end if coding == "utf-16-be" then - lines = unicode.utf16_to_utf8_be(lines) + lines = utf.utf16_to_utf8_be(lines) elseif coding == "utf-16-le" then - lines = unicode.utf16_to_utf8_le(lines) + lines = utf.utf16_to_utf8_le(lines) elseif coding == "utf-32-be" then - lines = unicode.utf32_to_utf8_be(lines) + lines = utf.utf32_to_utf8_be(lines) elseif coding == "utf-32-le" then - lines = unicode.utf32_to_utf8_le(lines) + lines = utf.utf32_to_utf8_le(lines) else -- utf8 or unknown (could be a mkvi file) local runner = textfileactions.runner if runner then diff --git a/tex/context/base/data-tmp.lua b/tex/context/base/data-tmp.lua index f7e64895b..c71002f2b 100644 --- a/tex/context/base/data-tmp.lua +++ b/tex/context/base/data-tmp.lua @@ -25,6 +25,7 @@ luatools with a recache feature.

local format, lower, gsub, concat = string.format, string.lower, string.gsub, table.concat local serialize, serializetofile = table.serialize, table.tofile local mkdirs, isdir = dir.mkdirs, lfs.isdir +local addsuffix, is_writable, is_readable = file.addsuffix, file.is_writable, file.is_readable local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) local trace_cache = false trackers.register("resolvers.cache", function(v) trace_cache = v end) @@ -49,8 +50,10 @@ end -- end of intermezzo -caches = caches or { } -local caches = caches +caches = caches or { } +local caches = caches + +local luasuffixes = utilities.lua.suffixes caches.base = caches.base or "luatex-cache" caches.more = caches.more or "context" @@ -78,18 +81,18 @@ local function identify() cachepath = file.collapsepath(cachepath) local valid = isdir(cachepath) if valid then - if file.is_readable(cachepath) then + if is_readable(cachepath) then readables[#readables+1] = cachepath - if not writable and file.is_writable(cachepath) then + if not writable and is_writable(cachepath) then writable = cachepath end end elseif not writable and caches.force then local cacheparent = file.dirname(cachepath) - if file.is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths) + if is_writable(cacheparent) and true then -- we go on anyway (needed for mojca's kind of paths) if not caches.ask or io.ask(format("\nShould I create the cache path %s?",cachepath), "no", { "yes", "no" }) == "yes" then mkdirs(cachepath) - if isdir(cachepath) and file.is_writable(cachepath) then + if isdir(cachepath) and is_writable(cachepath) then report_caches("created: %s",cachepath) writable = cachepath readables[#readables+1] = cachepath @@ -111,8 +114,8 @@ local function identify() cachepath = resolvers.resolve(cachepath) cachepath = resolvers.cleanpath(cachepath) local valid = isdir(cachepath) - if valid and file.is_readable(cachepath) then - if not writable and file.is_writable(cachepath) then + if valid and is_readable(cachepath) then + if not writable and is_writable(cachepath) then readables[#readables+1] = cachepath writable = cachepath break @@ -201,7 +204,7 @@ end local r_cache, w_cache = { }, { } -- normally w in in r but who cares -local function getreadablepaths(...) -- we can optimize this as we have at most 2 tags +local function getreadablepaths(...) local tags = { ... } local hash = concat(tags,"/") local done = r_cache[hash] @@ -244,7 +247,7 @@ function caches.getfirstreadablefile(filename,...) for i=1,#rd do local path = rd[i] local fullname = file.join(path,filename) - if file.is_readable(fullname) then + if is_readable(fullname) then usedreadables[i] = true return fullname, path end @@ -265,7 +268,7 @@ function caches.define(category,subcategory) -- for old times sake end function caches.setluanames(path,name) - return path .. "/" .. name .. ".tma", path .. "/" .. name .. ".tmc" + return format("%s/%s.%s",path,name,luasuffixes.tma), format("%s/%s.%s",path,name,luasuffixes.tmc) end function caches.loaddata(readables,name) @@ -275,7 +278,13 @@ function caches.loaddata(readables,name) for i=1,#readables do local path = readables[i] local tmaname, tmcname = caches.setluanames(path,name) - local loader = loadfile(tmcname) or loadfile(tmaname) + local loader = loadfile(tmcname) + if not loader then + -- in case we have a different engine + utilities.lua.compile(tmaname,tmcname) + -- + loader = loadfile(tmaname) + end if loader then loader = loader() collectgarbage("step") @@ -287,11 +296,15 @@ end function caches.is_writable(filepath,filename) local tmaname, tmcname = caches.setluanames(filepath,filename) - return file.is_writable(tmaname) + return is_writable(tmaname) end local saveoptions = { compact = true } +-- add some point we will only use the internal bytecode compiler and +-- then we can flag success in the tma so that it can trigger a compile +-- if the other engine + function caches.savedata(filepath,filename,data,raw) local tmaname, tmcname = caches.setluanames(filepath,filename) local reduce, simplify = true, true @@ -317,9 +330,9 @@ end function caches.loadcontent(cachename,dataname) local name = caches.hashed(cachename) - local full, path = caches.getfirstreadablefile(name ..".lua","trees") + local full, path = caches.getfirstreadablefile(addsuffix(name,luasuffixes.lua),"trees") local filename = file.join(path,name) - local blob = loadfile(filename .. ".luc") or loadfile(filename .. ".lua") + local blob = loadfile(addsuffix(filename,luasuffixes.luc)) or loadfile(addsuffix(filename,luasuffixes.lua)) if blob then local data = blob() if data and data.content then @@ -354,9 +367,10 @@ end function caches.savecontent(cachename,dataname,content) local name = caches.hashed(cachename) - local full, path = caches.setfirstwritablefile(name ..".lua","trees") + local full, path = caches.setfirstwritablefile(addsuffix(name,luasuffixes.lua),"trees") local filename = file.join(path,name) -- is full - local luaname, lucname = filename .. ".lua", filename .. ".luc" + local luaname = addsuffix(filename,luasuffixes.lua) + local lucname = addsuffix(filename,luasuffixes.luc) if trace_locating then report_resolvers("preparing '%s' for '%s'",dataname,cachename) end diff --git a/tex/context/base/data-use.lua b/tex/context/base/data-use.lua index d2a9b7571..fb6798900 100644 --- a/tex/context/base/data-use.lua +++ b/tex/context/base/data-use.lua @@ -59,7 +59,7 @@ statistics.register("used cache path", function() return caches.usedpaths() end function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname == formatname local enginebanner = status.list().banner if formatbanner and enginebanner and sourcefile then - local luvname = file.replacesuffix(texname,"luv") + local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv local luvdata = { enginebanner = enginebanner, formatbanner = formatbanner, @@ -70,10 +70,14 @@ function statistics.savefmtstatus(texname,formatbanner,sourcefile) -- texname == end end +-- todo: check this at startup and return (say) 999 as signal that the run +-- was aborted due to a wrong format in which case mtx-context can trigger +-- a remake + function statistics.checkfmtstatus(texname) local enginebanner = status.list().banner if enginebanner and texname then - local luvname = file.replacesuffix(texname,"luv") + local luvname = file.replacesuffix(texname,"luv") -- utilities.lua.suffixes.luv if lfs.isfile(luvname) then local luv = dofile(luvname) if luv and luv.sourcefile then diff --git a/tex/context/base/file-job.lua b/tex/context/base/file-job.lua index fda4f27da..f05be5708 100644 --- a/tex/context/base/file-job.lua +++ b/tex/context/base/file-job.lua @@ -127,7 +127,7 @@ end -- moved from tex to lua: local texpatterns = { "%s.mkvi", "%s.mkiv", "%s.tex" } -local luapatterns = { "%s.luc", "%s.lua" } +local luapatterns = { "%s" .. utilities.lua.suffixes.luc, "%s.lua" } local cldpatterns = { "%s.cld" } local xmlpatterns = { "%s.xml" } diff --git a/tex/context/base/font-con.lua b/tex/context/base/font-con.lua index 77d7d3e5c..516dffe98 100644 --- a/tex/context/base/font-con.lua +++ b/tex/context/base/font-con.lua @@ -6,11 +6,8 @@ if not modules then modules = { } end modules ['font-con'] = { license = "see context related readme files" } - -- some names of table entries will be changed (no _) -local utf = unicode.utf8 - local next, tostring, rawget = next, tostring, rawget local format, match, lower, gsub = string.format, string.match, string.lower, string.gsub local utfbyte = utf.byte diff --git a/tex/context/base/font-def.lua b/tex/context/base/font-def.lua index 03037f1c6..8d4fd25fc 100644 --- a/tex/context/base/font-def.lua +++ b/tex/context/base/font-def.lua @@ -8,7 +8,6 @@ if not modules then modules = { } end modules ['font-def'] = { -- We can overload some of the definers.functions so we don't local them. -local concat = table.concat local format, gmatch, match, find, lower, gsub = string.format, string.gmatch, string.match, string.find, string.lower, string.gsub local tostring, next = tostring, next local lpegmatch = lpeg.match diff --git a/tex/context/base/font-ext.lua b/tex/context/base/font-ext.lua index 560cf23ee..e23abd701 100644 --- a/tex/context/base/font-ext.lua +++ b/tex/context/base/font-ext.lua @@ -6,7 +6,6 @@ if not modules then modules = { } end modules ['font-ext'] = { license = "see context related readme files" } -local utf = unicode.utf8 local next, type, byte = next, type, string.byte local gmatch, concat, format = string.gmatch, table.concat, string.format local utfchar = utf.char diff --git a/tex/context/base/font-ota.lua b/tex/context/base/font-ota.lua index 5b2c3f22a..11615ea8b 100644 --- a/tex/context/base/font-ota.lua +++ b/tex/context/base/font-ota.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['font-ota'] = { -- this might become scrp-*.lua -local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat +local type = type if not trackers then trackers = { register = function() end } end diff --git a/tex/context/base/font-otf.lua b/tex/context/base/font-otf.lua index 9f0fb6657..8e9930a54 100644 --- a/tex/context/base/font-otf.lua +++ b/tex/context/base/font-otf.lua @@ -14,8 +14,6 @@ if not modules then modules = { } end modules ['font-otf'] = { -- more checking against low level calls of functions -local utf = unicode.utf8 - local utfbyte = utf.byte local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip local type, next, tonumber, tostring = type, next, tonumber, tostring @@ -455,32 +453,32 @@ function otf.load(filename,format,sub,featurefile) starttiming(data) report_otf("file size: %s", size) enhancers.apply(data,filename,fontdata) + local packtime = { } if packdata then if cleanup > 0 then collectgarbage("collect") ---~ lua.collectgarbage() end + starttiming(packtime) enhance("pack",data,filename,nil) + stoptiming(packtime) end report_otf("saving in cache: %s",filename) data = containers.write(otf.cache, hash, data) if cleanup > 1 then collectgarbage("collect") ---~ lua.collectgarbage() end stoptiming(data) if elapsedtime then -- not in generic - report_otf("preprocessing and caching took %s seconds",elapsedtime(data)) + report_otf("preprocessing and caching took %s seconds (packtime: %s)", + elapsedtime(data),packdata and elapsedtime(packtime) or 0) end fontloader.close(fontdata) -- free memory if cleanup > 3 then collectgarbage("collect") ---~ lua.collectgarbage() end data = containers.read(otf.cache, hash) -- this frees the old table and load the sparse one if cleanup > 2 then collectgarbage("collect") ---~ lua.collectgarbage() end else data = nil @@ -1058,7 +1056,10 @@ actions["reorganize subtables"] = function(data,filename,raw) -- local name = gk.name -- - if features then + if not name then + -- in fact an error + report_otf("skipping weird lookup number %s",k) + elseif features then -- scripts, tag, ismac local f = { } for i=1,#features do @@ -1491,6 +1492,9 @@ actions["merge kern classes"] = function(data,filename,raw) if type(lookups) ~= "table" then lookups = { lookups } end + -- if offsets[1] == nil then + -- offsets[1] = "" + -- end -- we can check the max in the loop -- local maxseconds = getn(seconds) for n, s in next, firsts do @@ -1511,9 +1515,9 @@ actions["merge kern classes"] = function(data,filename,raw) if splt then local extrakerns = { } local baseoffset = (fk-1) * maxseconds - -- for sk=2,maxseconds do - -- local sv = seconds[sk] - for sk, sv in next, seconds do + for sk=2,maxseconds do -- will become 1 based in future luatex + local sv = seconds[sk] + -- for sk, sv in next, seconds do local splt = split[sv] if splt then -- redundant test local offset = offsets[baseoffset + sk] diff --git a/tex/context/base/font-otn.lua b/tex/context/base/font-otn.lua index 71027d731..5dba663e4 100644 --- a/tex/context/base/font-otn.lua +++ b/tex/context/base/font-otn.lua @@ -28,6 +28,8 @@ if not modules then modules = { } end modules ['font-otn'] = { -- handle positions (we need example fonts) -- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) -- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +-- remove some optimizations (when I have a faster machine) + --[[ldx--

This module is a bit more split up that I'd like but since we also want to test diff --git a/tex/context/base/font-otp.lua b/tex/context/base/font-otp.lua index 6c9827de8..fc98b2bdc 100644 --- a/tex/context/base/font-otp.lua +++ b/tex/context/base/font-otp.lua @@ -7,11 +7,14 @@ if not modules then modules = { } end modules ['font-otp'] = { } -- todo: pack math (but not that much to share) +-- pitfall 5.2: hashed tables can suddenly become indexed with nil slots local next, type = next, type local sort, concat = table.sort, table.concat +local trace_packing = false trackers.register("otf.packing", function(v) trace_packing = v end) local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end) + local report_otf = logs.reporter("fonts","otf loading") -- also used in other scripts so we need to check some tables: @@ -33,28 +36,68 @@ otf.glists = glists local criterium = 1 local threshold = 0 -local function tabstr(t) - local s, n = { }, 0 +local function tabstr_normal(t) + local s = { } + local n = 0 for k, v in next, t do n = n + 1 if type(v) == "table" then - s[n] = k .. "={" .. tabstr(v) .. "}" + s[n] = k .. ">" .. tabstr_normal(v) elseif v == true then - s[n] = k .. "=true" + s[n] = k .. "+" -- "=true" elseif v then s[n] = k .. "=" .. v else - s[n] = k .. "=false" + s[n] = k .. "-" -- "=false" end end - if n == 1 then + if n == 0 then + return "" + elseif n == 1 then return s[1] else - sort(s) + sort(s) -- costly but needed (occasional wrong hit otherwise) return concat(s,",") end end +local function tabstr_flat(t) + local s = { } + local n = 0 + for k, v in next, t do + n = n + 1 + s[n] = k .. "=" .. v + end + if n == 0 then + return "" + elseif n == 1 then + return s[1] + else + sort(s) -- costly but needed (occasional wrong hit otherwise) + return concat(s,",") + end +end + +local function tabstr_boolean(t) + local s = { } + local n = 0 + for k, v in next, t do + n = n + 1 + if v then + s[n] = k .. "+" + else + s[n] = k .. "-" + end + end + if n == 0 then + return "" + elseif n == 1 then + return s[1] + else + sort(s) -- costly but needed (occasional wrong hit otherwise) + return concat(s,",") + end +end -- -- saves only a few tens of bytes -- @@ -74,41 +117,83 @@ local function packdata(data) local h, t, c = { }, { }, { } local hh, tt, cc = { }, { }, { } local nt, ntt = 0, 0 - local function pack_1(v,indexed) - -- v == table - local tag = indexed and concat(v," ") or tabstr(v) + local function pack_normal(v) + local tag = tabstr_normal(v,flat) + local ht = h[tag] + if ht then + c[ht] = c[ht] + 1 + return ht + else + nt = nt + 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt + end + end + local function pack_flat(v) + local tag = tabstr_flat(v) local ht = h[tag] - if not ht then + if ht then + c[ht] = c[ht] + 1 + return ht + else nt = nt + 1 - ht = nt - t[ht] = v - h[tag] = ht - c[ht] = 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt + end + end + local function pack_boolean(v) + local tag = tabstr_boolean(v) + local ht = h[tag] + if ht then + c[ht] = c[ht] + 1 + return ht else + nt = nt + 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt + end + end + local function pack_indexed(v) + local tag = concat(v," ") + local ht = h[tag] + if ht then c[ht] = c[ht] + 1 + return ht + else + nt = nt + 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt end - return ht end - local function pack_2(v,indexed) + local function pack_final(v) -- v == number if c[v] <= criterium then return t[v] else -- compact hash local hv = hh[v] - if not hv then + if hv then + return hv + else ntt = ntt + 1 - hv = ntt - tt[hv] = t[v] - hh[v] = hv - cc[hv] = c[v] + tt[ntt] = t[v] + hh[v] = ntt + cc[ntt] = c[v] + return ntt end - return hv end end local function success(stage,pass) if nt == 0 then - if trace_loading then + if trace_loading or trace_packing then report_otf("pack quality: nothing to pack") end return false @@ -136,35 +221,45 @@ local function packdata(data) end data.tables = tt end - if trace_loading then + if trace_loading or trace_packing then report_otf("pack quality: stage %s, pass %s, %s packed, 1-10:%s, 11-20:%s, rest:%s (criterium: %s)", stage, pass, one+two+rest, one, two, rest, criterium) end return true else - if trace_loading then + if trace_loading or trace_packing then report_otf("pack quality: stage %s, pass %s, %s packed, aborting pack (threshold: %s)", stage, pass, nt, threshold) end return false end end + local function packers(pass) + if pass == 1 then + return pack_normal, pack_indexed, pack_flat, pack_boolean + else + return pack_final, pack_final, pack_final, pack_final + end + end local resources = data.resources local lookuptypes = resources.lookuptypes for pass=1,2 do - local pack = (pass == 1 and pack_1) or pack_2 + if trace_packing then + report_otf("start packing: stage 1, pass %s",pass) + end + local pack_normal, pack_indexed, pack_flat, pack_boolean = packers(pass) for unicode, description in next, data.descriptions do local boundingbox = description.boundingbox if boundingbox then - description.boundingbox = pack(boundingbox,true) + description.boundingbox = pack_indexed(boundingbox) end local slookups = description.slookups if slookups then for tag, slookup in next, slookups do local what = lookuptypes[tag] if what == "pair" then - local t = slookup[2] if t then slookup[2] = pack(t,true) end - local t = slookup[3] if t then slookup[3] = pack(t,true) end + local t = slookup[2] if t then slookup[2] = pack_indexed(t) end + local t = slookup[3] if t then slookup[3] = pack_indexed(t) end elseif what ~= "substitution" then - slookups[tag] = pack(slookup) + slookups[tag] = pack_indexed(slookup) -- true is new end end end @@ -175,12 +270,12 @@ local function packdata(data) if what == "pair" then for i=1,#mlookup do local lookup = mlookup[i] - local t = lookup[2] if t then lookup[2] = pack(t,true) end - local t = lookup[3] if t then lookup[3] = pack(t,true) end + local t = lookup[2] if t then lookup[2] = pack_indexed(t) end + local t = lookup[3] if t then lookup[3] = pack_indexed(t) end end elseif what ~= "substitution" then for i=1,#mlookup do - mlookup[i] = pack(mlookup[i]) -- true + mlookup[i] = pack_indexed(mlookup[i]) -- true is new end end end @@ -188,7 +283,7 @@ local function packdata(data) local kerns = description.kerns if kerns then for tag, kern in next, kerns do - kerns[tag] = pack(kern) + kerns[tag] = pack_flat(kern) end end local math = description.math @@ -196,7 +291,7 @@ local function packdata(data) local kerns = math.kerns if kerns then for tag, kern in next, kerns do - kerns[tag] = pack(kern) + kerns[tag] = pack_normal(kern) end end end @@ -206,12 +301,14 @@ local function packdata(data) if what == "baselig" then for _, a in next, anchor do for k=1,#a do - a[k] = pack(a[k]) +-- a[k] = pack_normal(a[k]) + a[k] = pack_indexed(a[k]) end end else for k, v in next, anchor do - anchor[k] = pack(v) +-- anchor[k] = pack_normal(v) + anchor[k] = pack_indexed(v) end end end @@ -224,11 +321,11 @@ local function packdata(data) if rules then for i=1,#rules do -- was next loop local rule = rules[i] - local r = rule.before if r then for i=1,#r do r[i] = pack(r[i]) end end - local r = rule.after if r then for i=1,#r do r[i] = pack(r[i]) end end - local r = rule.current if r then for i=1,#r do r[i] = pack(r[i]) end end - local r = rule.replacements if r then rule.replacements = pack(r) end - local r = rule.lookups if r then rule.lookups = pack(r) end + local r = rule.before if r then for i=1,#r do r[i] = pack_boolean(r[i]) end end + local r = rule.after if r then for i=1,#r do r[i] = pack_boolean(r[i]) end end + local r = rule.current if r then for i=1,#r do r[i] = pack_boolean(r[i]) end end + local r = rule.replacements if r then rule.replacements = pack_boolean(r) end + local r = rule.lookups if r then rule.lookups = pack_boolean(r) end end end end @@ -236,13 +333,13 @@ local function packdata(data) local anchor_to_lookup = resources.anchor_to_lookup if anchor_to_lookup then for anchor, lookup in next, anchor_to_lookup do - anchor_to_lookup[anchor] = pack(lookup) + anchor_to_lookup[anchor] = pack_normal(lookup) end end local lookup_to_anchor = resources.lookup_to_anchor if lookup_to_anchor then for lookup, anchor in next, lookup_to_anchor do - lookup_to_anchor[lookup] = pack(anchor) + lookup_to_anchor[lookup] = pack_normal(anchor) end end local sequences = resources.sequences @@ -250,16 +347,16 @@ local function packdata(data) for feature, sequence in next, sequences do local flags = sequence.flags if flags then - sequence.flags = pack(flags) + sequence.flags = pack_normal(flags) end local subtables = sequence.subtables if subtables then - sequence.subtables = pack(subtables) + sequence.subtables = pack_normal(subtables) end local features = sequence.features if features then for script, feature in next, features do - features[script] = pack(feature) + features[script] = pack_normal(feature) end end end @@ -269,11 +366,11 @@ local function packdata(data) for name, lookup in next, lookups do local flags = lookup.flags if flags then - lookup.flags = pack(flags) + lookup.flags = pack_normal(flags) end local subtables = lookup.subtables if subtables then - lookup.subtables = pack(subtables) + lookup.subtables = pack_normal(subtables) end end end @@ -283,7 +380,7 @@ local function packdata(data) local list = features[what] if list then for feature, spec in next, list do - list[feature] = pack(spec) + list[feature] = pack_normal(spec) end end end @@ -294,27 +391,30 @@ local function packdata(data) end if nt > 0 then for pass=1,2 do - local pack = (pass == 1 and pack_1) or pack_2 + if trace_packing then + report_otf("start packing: stage 2, pass %s",pass) + end + local pack_normal, pack_indexed, pack_flat, pack_boolean = packers(pass) for unicode, description in next, data.descriptions do local kerns = description.kerns if kerns then - description.kerns = pack(kerns) + description.kerns = pack_normal(kerns) end local math = description.math if math then local kerns = math.kerns if kerns then - math.kerns = pack(kerns) + math.kerns = pack_normal(kerns) end end local anchors = description.anchors if anchors then - description.anchors = pack(anchors) + description.anchors = pack_normal(anchors) end local mlookups = description.mlookups if mlookups then for tag, mlookup in next, mlookups do - mlookups[tag] = pack(mlookup) + mlookups[tag] = pack_normal(mlookup) end end end @@ -325,9 +425,9 @@ local function packdata(data) if rules then for i=1,#rules do -- was next loop local rule = rules[i] - local r = rule.before if r then rule.before = pack(r) end - local r = rule.after if r then rule.after = pack(r) end - local r = rule.current if r then rule.current = pack(r) end + local r = rule.before if r then rule.before = pack_normal(r) end + local r = rule.after if r then rule.after = pack_normal(r) end + local r = rule.current if r then rule.current = pack_normal(r) end end end end @@ -335,7 +435,7 @@ local function packdata(data) local sequences = resources.sequences if sequences then for feature, sequence in next, sequences do - sequence.features = pack(sequence.features) + sequence.features = pack_normal(sequence.features) end end if not success(2,pass) then @@ -344,15 +444,15 @@ local function packdata(data) end for pass=1,2 do - local pack = (pass == 1 and pack_1) or pack_2 + local pack_normal, pack_indexed, pack_flat, pack_boolean = packers(pass) for unicode, description in next, data.descriptions do local slookups = description.slookups if slookups then - description.slookups = pack(slookups) + description.slookups = pack_normal(slookups) end local mlookups = description.mlookups if mlookups then - description.mlookups = pack(mlookups) + description.mlookups = pack_normal(mlookups) end end end diff --git a/tex/context/base/font-syn.lua b/tex/context/base/font-syn.lua index 9be307099..3f90da91b 100644 --- a/tex/context/base/font-syn.lua +++ b/tex/context/base/font-syn.lua @@ -8,7 +8,6 @@ if not modules then modules = { } end modules ['font-syn'] = { -- todo: subs in lookups requests -local utf = unicode.utf8 local next, tonumber = next, tonumber local sub, gsub, lower, match, find, lower, upper = string.sub, string.gsub, string.lower, string.match, string.find, string.lower, string.upper local find, gmatch = string.find, string.gmatch diff --git a/tex/context/base/font-vf.lua b/tex/context/base/font-vf.lua index 01d5289f8..34d74d93f 100644 --- a/tex/context/base/font-vf.lua +++ b/tex/context/base/font-vf.lua @@ -7,10 +7,13 @@ if not modules then modules = { } end modules ['font-vf'] = { } --[[ldx-- -

This is very experimental code! Not yet adapted to recent -changes. This will change.

+

This is very experimental code! Not yet adapted to recent changes. This will change.

--ldx]]-- +-- present in the backend but unspecified: +-- +-- vf.rule vf.special vf.right vf.push vf.down vf.char vf.node vf.fontid vf.pop vf.image vf.nop + local next = next local allocate = utilities.storage.allocate diff --git a/tex/context/base/l-dir.lua b/tex/context/base/l-dir.lua index 3deb660ce..0568bcfb5 100644 --- a/tex/context/base/l-dir.lua +++ b/tex/context/base/l-dir.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['l-dir'] = { -- dir.expandname will be merged with cleanpath and collapsepath -local type = type +local type, select = type, select local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local lpegmatch = lpeg.match @@ -261,15 +261,15 @@ local onwindows = os.type == "windows" or find(os.getenv("PATH"),";") if onwindows then function dir.mkdirs(...) - local str, pth, t = "", "", { ... } - for i=1,#t do - local s = t[i] - if s ~= "" then - if str ~= "" then - str = str .. "/" .. s - else - str = s - end + local str, pth = "", "" + for i=1,select("#",...) do + local s = select(i,...) + if s == "" then + -- skip + elseif str == "" then + str = s + else + str = str .. "/" .. s end end local first, middle, last @@ -329,9 +329,9 @@ if onwindows then else function dir.mkdirs(...) - local str, pth, t = "", "", { ... } - for i=1,#t do - local s = t[i] + local str, pth = "", "" + for i=1,select("#",...) do + local s = select(i,...) if s and s ~= "" then -- we catch nil and false if str ~= "" then str = str .. "/" .. s diff --git a/tex/context/base/l-file.lua b/tex/context/base/l-file.lua index d1ec753b1..f34bed5fd 100644 --- a/tex/context/base/l-file.lua +++ b/tex/context/base/l-file.lua @@ -36,25 +36,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = C((noslashes^0 * slashes^1)^1) local function pathpart(name,default) - return lpegmatch(pattern,name) or default or "" + return name and lpegmatch(pattern,name) or default or "" end local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 local function basename(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0 local function nameonly(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 local function suffixonly(name) - return lpegmatch(pattern,name) or "" + return name and lpegmatch(pattern,name) or "" end file.pathpart = pathpart @@ -85,7 +85,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture local pattern_d = path * rest function file.splitname(str,splitdrive) - if splitdrive then + if not str then + -- error + elseif splitdrive then return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix else return lpegmatch(pattern_b,str) -- returns path, base, suffix @@ -93,34 +95,36 @@ function file.splitname(str,splitdrive) end function file.splitbase(str) - return lpegmatch(pattern_d,str) -- returns path, base+suffix + return str and lpegmatch(pattern_d,str) -- returns path, base+suffix end function file.nametotable(str,splitdrive) -- returns table - local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) - if splitdrive then - return { - path = path, - drive = drive, - subpath = subpath, - name = name, - base = base, - suffix = suffix, - } - else - return { - path = path, - name = name, - base = base, - suffix = suffix, - } + if str then + local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) + if splitdrive then + return { + path = path, + drive = drive, + subpath = subpath, + name = name, + base = base, + suffix = suffix, + } + else + return { + path = path, + name = name, + base = base, + suffix = suffix, + } + end end end local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1) function file.removesuffix(name) - return lpegmatch(pattern,name) + return name and lpegmatch(pattern,name) end -- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 @@ -137,8 +141,8 @@ end local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) -function file.addsuffix(filename, suffix, criterium) - if not suffix or suffix == "" then +function file.addsuffix(filename,suffix,criterium) + if not filename or not suffix or suffix == "" then return filename elseif criterium == true then return filename .. "." .. suffix @@ -184,7 +188,7 @@ local suffix = period * (1-period-slashes)^1 * -1 local pattern = Cs((1-suffix)^0) function file.replacesuffix(name,suffix) - if suffix and suffix ~= "" then + if name and suffix and suffix ~= "" then return lpegmatch(pattern,name) .. "." .. suffix else return name @@ -193,10 +197,10 @@ end -- -local reslasher = lpeg.replacer(S("\\"),"/") +local reslasher = lpeg.replacer(P("\\"),"/") function file.reslash(str) - return lpegmatch(reslasher,str) + return str and lpegmatch(reslasher,str) end -- We should be able to use: @@ -212,7 +216,9 @@ end -- variant: function file.is_writable(name) - if lfs.isdir(name) then + if not name then + -- error + elseif lfs.isdir(name) then name = name .. "/m_t_x_t_e_s_t.tmp" local f = io.open(name,"wb") if f then @@ -240,24 +246,32 @@ end local readable = P("r") * Cc(true) function file.is_readable(name) - local a = attributes(name) - return a and lpegmatch(readable,a.permissions) or false + if name then + local a = attributes(name) + return a and lpegmatch(readable,a.permissions) or false + else + return false + end end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated function file.size(name) - local a = attributes(name) - return a and a.size or 0 + if name then + local a = attributes(name) + return a and a.size or 0 + else + return 0 + end end function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) - return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) + return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) end function file.joinpath(tab,separator) -- table - return concat(tab,separator or io.pathseparator) -- can have trailing // + return tab and concat(tab,separator or io.pathseparator) -- can have trailing // end local stripper = Cs(P(fwslash)^0/"" * reslasher) @@ -265,14 +279,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon local isroot = fwslash^1 * -1 local hasroot = fwslash^1 -function file.join(...) -- rather dirty +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...) local lst = { ... } local one = lst[1] if lpegmatch(isnetwork,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) return one .. "/" .. two elseif lpegmatch(isroot,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) if lpegmatch(hasroot,two) then return two else @@ -281,7 +304,7 @@ function file.join(...) -- rather dirty elseif one == "" then return lpegmatch(stripper,concat(lst,"/",2)) else - return lpegmatch(reslasher,concat(lst,"/")) + return lpegmatch(deslasher,concat(lst,"/")) end end @@ -310,6 +333,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C local absolute = fwslash function file.collapsepath(str,anchor) + if not str then + return + end if anchor and not lpegmatch(anchors,str) then str = getcurrentdir() .. "/" .. str end @@ -319,7 +345,6 @@ function file.collapsepath(str,anchor) return lpegmatch(reslasher,str) end local starter, oldelements = lpegmatch(splitstarter,str) --- inspect(oldelements) local newelements = { } local i = #oldelements while i > 0 do @@ -373,11 +398,13 @@ local whatever = P("-")^0 / "" local pattern_b = Cs(whatever * (1 - whatever * -1)^1) function file.robustname(str,strict) - str = lpegmatch(pattern_a,str) or str - if strict then - return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) - else - return str + if str then + str = lpegmatch(pattern_a,str) or str + if strict then + return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) + else + return str + end end end @@ -385,7 +412,9 @@ file.readdata = io.loaddata file.savedata = io.savedata function file.copy(oldname,newname) - file.savedata(newname,io.loaddata(oldname)) + if oldname and newname then + file.savedata(newname,io.loaddata(oldname)) + end end -- also rewrite previous @@ -406,11 +435,11 @@ lpeg.patterns.rootbased = rootbased -- ./name ../name /name c: :// name/name function file.is_qualified_path(filename) - return lpegmatch(qualified,filename) ~= nil + return filename and lpegmatch(qualified,filename) ~= nil end function file.is_rootbased_path(filename) - return lpegmatch(rootbased,filename) ~= nil + return filename and lpegmatch(rootbased,filename) ~= nil end -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end @@ -432,8 +461,10 @@ end -- for myself: function file.strip(name,dir) - local b, a = match(name,"^(.-)" .. dir .. "(.*)$") - return a ~= "" and a or name + if name then + local b, a = match(name,"^(.-)" .. dir .. "(.*)$") + return a ~= "" and a or name + end end -- local debuglist = { diff --git a/tex/context/base/l-io.lua b/tex/context/base/l-io.lua index ec628b5e0..e7bc23642 100644 --- a/tex/context/base/l-io.lua +++ b/tex/context/base/l-io.lua @@ -328,7 +328,7 @@ function io.readstring(f,n,m) f:seek("set",n) n = m end - local str = gsub(f:read(n),"%z","") + local str = gsub(f:read(n),"\000","") return str end diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index a5fdec765..b00d02f9f 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -6,9 +6,11 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } - -- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) +-- move utf -> l-unicode +-- move string -> l-string or keep it here + local lpeg = require("lpeg") -- tracing (only used when we encounter a problem in integration of lpeg in luatex) @@ -60,12 +62,9 @@ local byte, char, gmatch, format = string.byte, string.char, string.gmatch, stri lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt local lpegtype, lpegmatch = lpeg.type, lpeg.match -local utfcharacters = string.utfcharacters -local utfgmatch = unicode and unicode.utf8.gmatch - local anything = P(1) local endofstring = P(-1) local alwaysmatched = P(true) @@ -91,9 +90,12 @@ local utfbom_8 = P('\239\187\191') local utfbom = utfbom_32_be + utfbom_32_le + utfbom_16_be + utfbom_16_le + utfbom_8 -local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le" - + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le" - + utfbom_8 / "utf-8" + alwaysmatched / "unknown" +local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + + utfbom_8 * Cc(3) + Cc(0) local utf8next = R("\128\191") @@ -103,6 +105,7 @@ patterns.utf8three = R("\224\239") * utf8next * utf8next patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next patterns.utfbom = utfbom patterns.utftype = utftype +patterns.utfoffset = utfoffset local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) @@ -115,6 +118,22 @@ patterns.validutf8char = validutf8char local eol = S("\n\r") local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) local whitespace = eol + spacer +local nonspacer = 1 - spacer +local nonwhitespace = 1 - whitespace + +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace +patterns.nonspacer = nonspacer +patterns.nonwhitespace = nonwhitespace + +local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto + +----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) + +patterns.stripper = stripper +patterns.collapser = collapser patterns.digit = digit patterns.sign = sign @@ -137,13 +156,8 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = eol -patterns.spacer = spacer -patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - spacer -patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") patterns.commaspacer = P(",") * spacer^0 @@ -156,8 +170,8 @@ patterns.squote = squote patterns.dquote = dquote patterns.nosquote = (escaped + (1-squote))^0 patterns.nodquote = (escaped + (1-dquote))^0 -patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") +patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble patterns.unspacer = ((patterns.spacer^1)/"")^0 @@ -165,16 +179,11 @@ patterns.singlequoted = squote * patterns.nosquote * squote patterns.doublequoted = dquote * patterns.nodquote * dquote patterns.quoted = patterns.doublequoted + patterns.singlequoted +patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) + patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) --- print(string.unquoted("test")) --- print(string.unquoted([["t\"est"]])) --- print(string.unquoted([["t\"est"x]])) --- print(string.unquoted("\'test\'")) --- print(string.unquoted('"test"')) --- print(string.unquoted('"test"')) - local function anywhere(pattern) --slightly adapted from website return P { P(pattern) + 1 * V(1) } end @@ -237,10 +246,10 @@ function string.splitup(str,separator) return lpegmatch(splitters_m[separator] or splitat(separator),str) end ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps local cache = { } @@ -273,12 +282,6 @@ local content = (empty + nonempty)^1 patterns.textline = content ---~ local linesplitter = Ct(content^0) ---~ ---~ function string.splitlines(str) ---~ return lpegmatch(linesplitter,str) ---~ end - local linesplitter = tsplitat(newline) patterns.linesplitter = linesplitter @@ -287,66 +290,7 @@ function string.splitlines(str) return lpegmatch(linesplitter,str) end -local utflinesplitter = utfbom^-1 * tsplitat(newline) - -patterns.utflinesplitter = utflinesplitter - -function string.utfsplitlines(str) - return lpegmatch(utflinesplitter,str or "") -end - -local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) -local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) - -function string.utfsplit(str,ignorewhitespace) -- new - if ignorewhitespace then - return lpegmatch(utfcharsplitter_iws,str or "") - else - return lpegmatch(utfcharsplitter_ows,str or "") - end -end - --- inspect(string.utfsplit("a b c d")) --- inspect(string.utfsplit("a b c d",true)) - --- -- alternative 1: 0.77 --- --- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) --- --- function string.utflength(str) --- return #lpegmatch(utfcharcounter,str or "") --- end --- --- -- alternative 2: 1.70 --- --- local n = 0 --- --- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow --- --- function string.utflength(str) --- n = 0 --- lpegmatch(utfcharcounter,str or "") --- return n --- end --- --- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) - -local n = 0 - -local utfcharcounter = utfbom^-1 * Cs ( ( - Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end - + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end - + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end - + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end -)^0 ) - -function string.utflength(str) - n = 0 - lpegmatch(utfcharcounter,str or "") - return n -end - ---~ lpeg.splitters = cache -- no longer public +-- lpeg.splitters = cache -- no longer public local cache = { } @@ -372,7 +316,7 @@ function string.checkedsplit(str,separator) return lpegmatch(c,str) end ---~ from roberto's site: +-- from roberto's site: local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end @@ -430,8 +374,11 @@ end -- Just for fun I looked at the used bytecode and -- p = (p and p + pp) or pp gets one more (testset). -function lpeg.replacer(one,two,makefunction) +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys local pattern + local u = isutf and utf8char or 1 if type(one) == "table" then local no = #one local p = P(false) @@ -439,24 +386,21 @@ function lpeg.replacer(one,two,makefunction) for k, v in next, one do p = p + P(k) / v end - pattern = Cs((p + 1)^0) + pattern = Cs((p + u)^0) elseif no == 1 then local o = one[1] one, two = P(o[1]), o[2] -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two + 1)^0) + pattern = Cs((one/two + u)^0) else for i=1,no do local o = one[i] p = p + P(o[1]) / o[2] end - pattern = Cs((p + 1)^0) + pattern = Cs((p + u)^0) end else - one = P(one) - two = two or "" - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two +1)^0) + pattern = Cs((P(one)/(two or "") + u)^0) end if makefunction then return function(str) @@ -470,14 +414,20 @@ end function lpeg.finder(lst,makefunction) local pattern if type(lst) == "table" then - local p = P(false) - for i=1,#lst do - p = p + P(lst[i]) + pattern = P(false) + if #lst == 0 then + for k, v in next, lst do + pattern = pattern + P(k) -- ignore key, so we can use a replacer table + end + else + for i=1,#lst do + pattern = pattern + P(lst[i]) + end end - pattern = (p + 1)^0 else - pattern = (P(lst) + 1)^0 + pattern = P(lst) end + pattern = (1-pattern)^0 * pattern if makefunction then return function(str) return lpegmatch(pattern,str) @@ -518,21 +468,21 @@ function lpeg.balancer(left,right) return P { left * ((1 - left - right) + V(1))^0 * right } end ---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) ---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) ---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) - ---~ -- slower: ---~ ---~ function lpeg.counter(pattern) ---~ local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 ---~ return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end ---~ end +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +-- -- slower: +-- +-- function lpeg.counter(pattern) +-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 +-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end local nany = utf8char/"" @@ -543,65 +493,12 @@ function lpeg.counter(pattern) end end -if utfgmatch then - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local n = 0 - for _ in utfgmatch(str,what) do - n = n + 1 - end - return n - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -else - - local cache = { } - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local p = cache[what] - if not p then - p = Cs((P(what)/" " + nany)^0) - cache[p] = p - end - return #lpegmatch(p,str) - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -end - -local patterns_escapes = { -- also defines in l-string - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%)", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} - -local simple_escapes = { -- also defines in l-string - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} - -local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) -local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) - -function string.escapedpattern(str,simple) - return lpegmatch(simple and s or p,str) -end - -- utf extensies +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch = unicode and unicode.utf8.gmatch +local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char) + lpeg.UP = lpeg.P if utfcharacters then @@ -640,8 +537,6 @@ end local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture -local utfchar = unicode and unicode.utf8 and unicode.utf8.char - function lpeg.UR(str,more) local first, last if type(str) == "number" then @@ -672,16 +567,16 @@ end -- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) ---~ lpeg.print(lpeg.R("ab","cd","gh")) ---~ lpeg.print(lpeg.P("a","b","c")) ---~ lpeg.print(lpeg.S("a","b","c")) +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) ---~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) ---~ print(lpeg.count("äáàa",lpeg.UP("áà"))) ---~ print(lpeg.count("äáàa",lpeg.US("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("aá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) function lpeg.is_lpeg(p) return p and lpegtype(p) == "pattern" @@ -703,12 +598,30 @@ end -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we -- loop back from the end cq. prepend. -local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! +local sort = table.sort + +local function copyindexed(old) + local new = { } + for i=1,#old do + new[i] = old + end + return new +end + +local function sortedkeys(tab) + local keys, s = { }, 0 + for key,_ in next, tab do + s = s + 1 + keys[s] = key + end + sort(keys) + return keys +end function lpeg.append(list,pp,delayed,checked) local p = pp if #list > 0 then - local keys = fastcopy(list) + local keys = copyindexed(list) sort(keys) for i=#keys,1,-1 do local k = keys[i] @@ -805,8 +718,10 @@ end local function make(t) local p --- for k, v in next, t do - for k, v in table.sortedhash(t) do + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] + local v = t[k] if not p then if next(v) then p = P(k) * make(v) @@ -824,7 +739,7 @@ local function make(t) return p end -function lpeg.utfchartabletopattern(list) +function lpeg.utfchartabletopattern(list) -- goes to util-lpg local tree = { } for i=1,#list do local t = tree @@ -856,20 +771,8 @@ end -- utfchar(0x205F), -- math thinspace -- } ) --- handy from within tex: - -local lpegmatch = lpeg.match - -local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! - -function string.tformat(fmt,...) - return format(lpegmatch(replacer,fmt),...) -end - --- strips leading and trailing spaces and collapsed all other spaces - -local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 -function string.collapsespaces(str) - return lpegmatch(pattern,str) -end +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol diff --git a/tex/context/base/l-lua.lua b/tex/context/base/l-lua.lua new file mode 100644 index 000000000..8ac351417 --- /dev/null +++ b/tex/context/base/l-lua.lua @@ -0,0 +1,107 @@ +if not modules then modules = { } end modules ['l-lua'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- compatibility hacks ... try to avoid usage + +local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$") + +_MAJORVERSION = tonumber(major) or 5 +_MINORVERSION = tonumber(minor) or 1 + +-- basics: + +if loadstring then + + local loadnormal = load + + function load(first,...) + if type(first) == "string" then + return loadstring(first,...) + else + return loadnormal(first,...) + end + end + +else + + loadstring = load + +end + +-- table: + +-- Starting with version 5.2 Lua no longer provide ipairs, which makes +-- sense. As we already used the for loop and # in most places the +-- impact on ConTeXt was not that large; the remaining ipairs already +-- have been replaced. In a similar fashion we also hardly used pairs. +-- +-- Hm, actually ipairs was retained, but we no longer use it anyway. +-- +-- Just in case, we provide the fallbacks as discussed in Programming +-- in Lua (http://www.lua.org/pil/7.3.html): + +if not ipairs then + + -- for k, v in ipairs(t) do ... end + -- for k=1,#t do local v = t[k] ... end + + local function iterate(a,i) + i = i + 1 + local v = a[i] + if v ~= nil then + return i, v --, nil + end + end + + function ipairs(a) + return iterate, a, 0 + end + +end + +if not pairs then + + -- for k, v in pairs(t) do ... end + -- for k, v in next, t do ... end + + function pairs(t) + return next, t -- , nil + end + +end + +-- The unpack function has been moved to the table table, and for compatiility +-- reasons we provide both now. + +if not table.unpack then + + table.unpack = _G.unpack + +elseif not unpack then + + _G.unpack = table.unpack + +end + +-- package: + +-- if not package.seachers then +-- +-- package.searchers = package.loaders -- 5.2 +-- +-- elseif not package.loaders then +-- +-- package.loaders = package.searchers +-- +-- end + +if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type + + package.loaders = package.searchers + +end diff --git a/tex/context/base/l-number.lua b/tex/context/base/l-number.lua index a4dbe3bdf..f974f2582 100644 --- a/tex/context/base/l-number.lua +++ b/tex/context/base/l-number.lua @@ -16,10 +16,129 @@ local lpegmatch = lpeg.match number = number or { } local number = number --- a,b,c,d,e,f = number.toset(100101) +if bit32 then + + local btest, bor = bit32.btest, bit32.bor + + function number.bit(p) + return 2 ^ (p - 1) -- 1-based indexing + end + + number.hasbit = btest + number.setbit = bor + + function number.setbit(x,p) + return btest(x,p) and x or x + p + end + + function number.clearbit(x,p) + return btest(x,p) and x - p or x + end + +else + + -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html + + function number.bit(p) + return 2 ^ (p - 1) -- 1-based indexing + end + + function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... + return x % (p + p) >= p + end + + function number.setbit(x, p) + return (x % (p + p) >= p) and x or x + p + end + + function number.clearbit(x, p) + return (x % (p + p) >= p) and x - p or x + end -function number.toset(n) - return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +end + +-- print(number.tobitstring(8)) +-- print(number.tobitstring(14)) +-- print(number.tobitstring(66)) +-- print(number.tobitstring(0x00)) +-- print(number.tobitstring(0xFF)) +-- print(number.tobitstring(46260767936,4)) + +if bit32 then + + local bextract = bit32.extract + + local t = { + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + } + + function number.tobitstring(b,m) + -- if really needed we can speed this one up + -- because small numbers need less extraction + local n = 32 + for i=0,31 do + local v = bextract(b,i) + local k = 32 - i + if v == 1 then + n = k + t[k] = "1" + else + t[k] = "0" + end + end + if m then + m = 33 - m * 8 + if m < 1 then + m = 1 + end + return concat(t,"",m) + elseif n < 8 then + return concat(t) + elseif n < 16 then + return concat(t,"",9) + elseif n < 24 then + return concat(t,"",17) + else + return concat(t,"",25) + end + end + +else + + function number.tobitstring(n,m) + if n > 0 then + local t = { } + while n > 0 do + insert(t,1,n % 2 > 0 and 1 or 0) + n = floor(n/2) + end + local nn = 8 - #t % 8 + if nn > 0 and nn < 8 then + for i=1,nn do + insert(t,1,0) + end + end + if m then + m = m * 8 - #t + if m > 0 then + insert(t,1,rep("0",m)) + end + end + return concat(t) + elseif m then + rep("00000000",m) + else + return "00000000" + end + end + +end + +function number.valid(str,default) + return tonumber(str) or default or nil end function number.toevenhex(n) @@ -31,104 +150,57 @@ function number.toevenhex(n) end end --- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% --- on +-- a,b,c,d,e,f = number.toset(100101) +-- +-- function number.toset(n) +-- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +-- end +-- +-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% +-- -- on -- -- for i=1,1000000 do -- local a,b,c,d,e,f,g,h = number.toset(12345678) -- local a,b,c,d = number.toset(1234) -- local a,b,c = number.toset(123) +-- local a,b,c = number.toset("123") -- end --- --- of course dedicated "(.)(.)(.)(.)" matches are even faster -local one = lpeg.C(1-lpeg.S(''))^1 +local one = lpeg.C(1-lpeg.S('')/tonumber)^1 function number.toset(n) return lpegmatch(one,tostring(n)) end -function number.bits(n,zero) - local t, i = { }, (zero and 0) or 1 - while n > 0 do +-- function number.bits(n,zero) +-- local t, i = { }, (zero and 0) or 1 +-- while n > 0 do +-- local m = n % 2 +-- if m > 0 then +-- insert(t,1,i) +-- end +-- n = floor(n/2) +-- i = i + 1 +-- end +-- return t +-- end +-- +-- -- a bit faster + +local function bits(n,i,...) + if n > 0 then local m = n % 2 + local n = floor(n/2) if m > 0 then - insert(t,1,i) - end - n = floor(n/2) - i = i + 1 - end - return t -end - ---~ http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html - -function number.bit(p) - return 2 ^ (p - 1) -- 1-based indexing -end - -function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... - return x % (p + p) >= p -end - -function number.setbit(x, p) - return (x % (p + p) >= p) and x or x + p -end - -function number.clearbit(x, p) - return (x % (p + p) >= p) and x - p or x -end - ---~ function number.tobitstring(n) ---~ if n == 0 then ---~ return "0" ---~ else ---~ local t = { } ---~ while n > 0 do ---~ insert(t,1,n % 2 > 0 and 1 or 0) ---~ n = floor(n/2) ---~ end ---~ return concat(t) ---~ end ---~ end - -function number.tobitstring(n,m) - if n == 0 then - if m then - rep("00000000",m) + return bits(n, i+1, i, ...) else - return "00000000" + return bits(n, i+1, ...) end else - local t = { } - while n > 0 do - insert(t,1,n % 2 > 0 and 1 or 0) - n = floor(n/2) - end - local nn = 8 - #t % 8 - if nn > 0 and nn < 8 then - for i=1,nn do - insert(t,1,0) - end - end - if m then - m = m * 8 - #t - if m > 0 then - insert(t,1,rep("0",m)) - end - end - return concat(t) + return ... end end ---~ print(number.tobitstring(8)) ---~ print(number.tobitstring(14)) ---~ print(number.tobitstring(66)) ---~ print(number.tobitstring(0x00)) ---~ print(number.tobitstring(0xFF)) ---~ print(number.tobitstring(46260767936,8)) ---~ print(#number.tobitstring(46260767936,6)) - -function number.valid(str,default) - return tonumber(str) or default or nil +function number.bits(n) + return { bits(n,1) } end diff --git a/tex/context/base/l-string.lua b/tex/context/base/l-string.lua index 857acb019..597ce3462 100644 --- a/tex/context/base/l-string.lua +++ b/tex/context/base/l-string.lua @@ -7,40 +7,46 @@ if not modules then modules = { } end modules ['l-string'] = { } local string = string -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct - --- some functions may disappear as they are not used anywhere - -if not string.split then - - -- this will be overloaded by a faster lpeg variant - - function string.split(str,pattern) - local t = { } - if #str > 0 then - local n = 1 - for s in gmatch(str..pattern,"(.-)"..pattern) do - t[n] = s - n = n + 1 - end - end - return t - end - -end +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs + +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). + +-- if not string.split then +-- +-- function string.split(str,pattern) +-- local t = { } +-- if #str > 0 then +-- local n = 1 +-- for s in gmatch(str..pattern,"(.-)"..pattern) do +-- t[n] = s +-- n = n + 1 +-- end +-- end +-- return t +-- end +-- +-- end + +-- function string.unquoted(str) +-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end + +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote + + patterns.dquote * C(patterns.nodquote) * patterns.dquote function string.unquoted(str) - return (gsub(str,"^([\"\'])(.*)%1$","%2")) + return lpegmatch(unquoted,str) or str end ---~ function stringunquoted(str) ---~ if find(str,"^[\'\"]") then ---~ return sub(str,2,-2) ---~ else ---~ return str ---~ end ---~ end +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) function string.quoted(str) return format("%q",str) -- always " @@ -63,65 +69,112 @@ function string.limit(str,n,sentinel) -- not utf proof end end -local space = S(" \t\v\n") -local nospace = 1 - space -local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code +local stripper = patterns.stripper +local collapser = patterns.collapser function string.strip(str) return lpegmatch(stripper,str) or "" end +function string.collapsespaces(str) + return lpegmatch(collapser,str) or "" +end + +-- function string.is_empty(str) +-- return not find(str,"%S") +-- end + +local pattern = P(" ")^0 * P(-1) + function string.is_empty(str) - return not find(str,"%S") + if str == "" then + return true + else + return lpegmatch(pattern,str) and true or false + end end -local patterns_escapes = { - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%(", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} -local simple_escapes = { - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} +-- if not string.escapedpattern then +-- +-- local patterns_escapes = { +-- ["%"] = "%%", +-- ["."] = "%.", +-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +-- ["["] = "%[", ["]"] = "%]", +-- ["("] = "%(", [")"] = "%)", +-- -- ["{"] = "%{", ["}"] = "%}" +-- -- ["^"] = "%^", ["$"] = "%$", +-- } +-- +-- local simple_escapes = { +-- ["-"] = "%-", +-- ["."] = "%.", +-- ["?"] = ".", +-- ["*"] = ".*", +-- } +-- +-- function string.escapedpattern(str,simple) +-- return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +-- end +-- +-- function string.topattern(str,lowercase,strict) +-- if str == "" then +-- return ".*" +-- else +-- str = gsub(str,".",simple_escapes) +-- if lowercase then +-- str = lower(str) +-- end +-- if strict then +-- return "^" .. str .. "$" +-- else +-- return str +-- end +-- end +-- end +-- +-- end + +--- needs checking + +local anything = patterns.anything +local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?") -- wildcard and single match + +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") ) function string.escapedpattern(str,simple) - return (gsub(str,".",simple and simple_escapes or patterns_escapes)) + return lpegmatch(simple and pattern_b or pattern_a,str) end function string.topattern(str,lowercase,strict) if str == "" then return ".*" + elseif strict then + str = lpegmatch(pattern_c,str) else - str = gsub(str,".",simple_escapes) - if lowercase then - str = lower(str) - end - if strict then - return "^" .. str .. "$" - else - return str - end + str = lpegmatch(pattern_b,str) + end + if lowercase then + return lower(str) + else + return str end end +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern ("12+34*.tex",false,false)) +-- print(string.topattern ("12+34*.tex",false,true)) function string.valid(str,default) return (type(str) == "string" and str ~= "" and str) or default or nil end --- obsolete names: - -string.quote = string.quoted -string.unquote = string.unquoted - -- handy fallback string.itself = function(s) return s end @@ -133,3 +186,16 @@ local pattern = Ct(C(1)^0) -- string and not utf ! function string.totable(str) return lpegmatch(pattern,str) end + +-- handy from within tex: + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- obsolete names: + +string.quote = string.quoted +string.unquote = string.unquoted diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua index 62e0cae14..b668281a8 100644 --- a/tex/context/base/l-table.lua +++ b/tex/context/base/l-table.lua @@ -6,68 +6,23 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match +local format, lower, dump = string.format, string.lower, string.dump local getmetatable, setmetatable = getmetatable, setmetatable local getinfo = debug.getinfo - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashion we also hardly used pairs. --- --- Hm, actually ipairs was retained, but we no longer use it anyway. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - - -- for k, v in ipairs(t) do ... end - -- for k=1,#t do local v = t[k] ... end - - local function iterate(a,i) - i = i + 1 - local v = a[i] - if v ~= nil then - return i, v --, nil - end - end - - function ipairs(a) - return iterate, a, 0 - end - -end - -if not pairs then - - -- for k, v in pairs(t) do ... end - -- for k, v in next, t do ... end - - function pairs(t) - return next, t -- , nil - end - -end - --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then - table.unpack = _G.unpack -elseif not unpack then - _G.unpack = table.unpack -end +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor -- extra functions, some might go (when not used) +local stripper = patterns.stripper + function table.strip(tab) local lst, l = { }, 0 for i=1,#tab do - local s = gsub(tab[i],"^%s*(.-)%s*$","%1") + local s = lpegmatch(stripper,tab[i]) or "" if s == "" then -- skip this one else @@ -176,7 +131,7 @@ local function sortedhash(t) end table.sortedhash = sortedhash -table.sortedpairs = sortedhash +table.sortedpairs = sortedhash -- obsolete function table.append(t,list) local n = #t @@ -200,31 +155,63 @@ function table.prepend(t, list) return t end +-- function table.merge(t, ...) -- first one is target +-- t = t or { } +-- local lst = { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- t[k] = v +-- end +-- end +-- return t +-- end + function table.merge(t, ...) -- first one is target t = t or { } - local lst = { ... } - for i=1,#lst do - for k, v in next, lst[i] do + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do t[k] = v end end return t end +-- function table.merged(...) +-- local tmp, lst = { }, { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- tmp[k] = v +-- end +-- end +-- return tmp +-- end + function table.merged(...) - local tmp, lst = { }, { ... } - for i=1,#lst do - for k, v in next, lst[i] do - tmp[k] = v + local t = { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v end end - return tmp + return t end +-- function table.imerge(t, ...) +-- local lst, nt = { ... }, #t +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- nt = nt + 1 +-- t[nt] = nst[j] +-- end +-- end +-- return t +-- end + function table.imerge(t, ...) - local lst, nt = { ... }, #t - for i=1,#lst do - local nst = lst[i] + local nt = #t + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do nt = nt + 1 t[nt] = nst[j] @@ -233,10 +220,22 @@ function table.imerge(t, ...) return t end +-- function table.imerged(...) +-- local tmp, ntmp, lst = { }, 0, {...} +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- ntmp = ntmp + 1 +-- tmp[ntmp] = nst[j] +-- end +-- end +-- return tmp +-- end + function table.imerged(...) - local tmp, ntmp, lst = { }, 0, {...} - for i=1,#lst do - local nst = lst[i] + local tmp, ntmp = { }, 0 + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do ntmp = ntmp + 1 tmp[ntmp] = nst[j] @@ -248,7 +247,7 @@ end local function fastcopy(old,metatabletoo) -- fast one if old then local new = { } - for k,v in next, old do + for k, v in next, old do if type(v) == "table" then new[k] = fastcopy(v,metatabletoo) -- was just table.copy else @@ -302,7 +301,7 @@ end table.fastcopy = fastcopy table.copy = copy -function table.derive(parent) +function table.derive(parent) -- for the moment not public local child = { } if parent then setmetatable(child,{ __index = parent }) @@ -383,6 +382,13 @@ end -- problem: there no good number_to_string converter with the best resolution +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) + +-- todo: %g faster on numbers than %s + +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") + local function dummy() end local function do_serialize(root,name,depth,level,indexed) @@ -392,14 +398,14 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s{",depth)) else local tn = type(name) - if tn == "number" then -- or find(k,"^%d+$") then + if tn == "number" then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end elseif tn == "string" then - if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + if noquotes and not reserved[name] and lpegmatch(propername,name) then handle(format("%s%s={",depth,name)) else handle(format("%s[%q]={",depth,name)) @@ -425,7 +431,6 @@ local function do_serialize(root,name,depth,level,indexed) if compact then last = #root for k=1,last do --- if not root[k] then if root[k] == nil then last = k - 1 break @@ -473,7 +478,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s %s,",depth,tostring(v))) elseif t == "function" then if functions then - handle(format('%s loadstring(%q),',depth,dump(v))) + handle(format('%s load(%q),',depth,dump(v))) else handle(format('%s "function",',depth)) end @@ -485,7 +490,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s __p__=nil,",depth)) end elseif t == "number" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) else @@ -497,7 +502,7 @@ local function do_serialize(root,name,depth,level,indexed) else handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g end - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then if hexify then handle(format("%s %s=0x%04X,",depth,k,v)) else @@ -512,7 +517,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "string" then if reduce and tonumber(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,v)) else @@ -520,13 +525,13 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,v)) else handle(format("%s [%q]=%s,",depth,k,v)) end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,v)) else @@ -534,7 +539,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,v)) else handle(format("%s [%q]=%q,",depth,k,v)) @@ -542,7 +547,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "table" then if not next(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={},",depth,k)) else @@ -550,7 +555,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]={},",depth,tostring(k))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={},",depth,k)) else handle(format("%s [%q]={},",depth,k)) @@ -558,15 +563,15 @@ local function do_serialize(root,name,depth,level,indexed) elseif inline then local st = simple_table(v) if st then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) @@ -578,15 +583,15 @@ local function do_serialize(root,name,depth,level,indexed) do_serialize(v,k,depth,level+1) end elseif t == "boolean" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) else handle(format("%s [%s]=%s,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,tostring(v))) else handle(format("%s [%q]=%s,",depth,k,tostring(v))) @@ -595,30 +600,30 @@ local function do_serialize(root,name,depth,level,indexed) if functions then local f = getinfo(v).what == "C" and dump(dummy) or dump(v) -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then - handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) + handle(format("%s [0x%04X]=load(%q),",depth,k,f)) else - handle(format("%s [%s]=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,k,f)) end elseif tk == "boolean" then - handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=load(%q),",depth,k,f)) else - handle(format("%s [%q]=loadstring(%q),",depth,k,f)) + handle(format("%s [%q]=load(%q),",depth,k,f)) end end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) else handle(format("%s [%s]=%q,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,tostring(v))) else handle(format("%s [%q]=%q,",depth,k,tostring(v))) @@ -699,14 +704,14 @@ local function serialize(_handle,root,name,specification) -- handle wins handle("}") end ---~ name: ---~ ---~ true : return { } ---~ false : { } ---~ nil : t = { } ---~ string : string = { } ---~ 'return' : return { } ---~ number : [number] = { } +-- name: +-- +-- true : return { } +-- false : { } +-- nil : t = { } +-- string : string = { } +-- "return" : return { } +-- number : [number] = { } function table.serialize(root,name,specification) local t, n = { }, 0 @@ -795,7 +800,7 @@ table.flattened = flattened local function unnest(t,f) -- only used in mk, for old times sake if not f then -- and only relevant for token lists - f = { } + f = { } -- this one can become obsolete end for i=1,#t do local v = t[i] @@ -824,7 +829,7 @@ local function are_equal(a,b,n,m) -- indexed local ai, bi = a[i], b[i] if ai==bi then -- same - elseif type(ai)=="table" and type(bi)=="table" then + elseif type(ai) == "table" and type(bi) == "table" then if not are_equal(ai,bi) then return false end @@ -859,10 +864,10 @@ table.are_equal = are_equal -- maybe also make a combined one -function table.compact(t) +function table.compact(t) -- remove empty tables, assumes subtables if t then - for k,v in next, t do - if not next(v) then + for k, v in next, t do + if not next(v) then -- no type checking t[k] = nil end end @@ -901,7 +906,7 @@ function table.swapped(t,s) -- hash return n end -function table.mirror(t) -- hash +function table.mirrored(t) -- hash local n = { } for k, v in next, t do n[v] = k @@ -924,6 +929,17 @@ function table.reversed(t) end end +function table.reverse(t) + if t then + local n = #t + for i=1,floor(n/2) do + local j = n - i + 1 + t[i], t[j] = t[j], t[i] + end + return t + end +end + function table.sequenced(t,sep) -- hash only if t then local s, n = { }, 0 diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index 7fd380b88..7c452ef8f 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -10,29 +10,45 @@ if not modules then modules = { } end modules ['l-unicode'] = { -- todo: utf.sub replacement (used in syst-aux) -local concat = table.concat +-- we put these in the utf namespace: + +utf = utf or (unicode and unicode.utf8) or { } + +utf.characters = utf.characters or string.utfcharacters +utf.values = utf.values or string.utfvalues + +-- string.utfvalues +-- string.utfcharacters +-- string.characters +-- string.characterpairs +-- string.bytes +-- string.bytepairs + local type = type -local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt +local char, byte, format, sub = string.char, string.byte, string.format, string.sub +local concat = table.concat +local P, C, R, Cs, Ct, Cmt, Cc, Carg = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg local lpegmatch, patterns = lpeg.match, lpeg.patterns -local utftype = patterns.utftype -local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub -local utfsplitlines = string.utfsplitlines -if not unicode then +local bytepairs = string.bytepairs - unicode = { } +local finder = lpeg.finder +local replacer = lpeg.replacer -end - -local unicode = unicode +local utfvalues = utf.values +local utfgmatch = utf.gmatch -- not always present -utf = utf or unicode.utf8 +local p_utftype = patterns.utftype +local p_utfoffset = patterns.utfoffset +local p_utf8char = patterns.utf8char +local p_utf8byte = patterns.utf8byte +local p_utfbom = patterns.utfbom +local p_newline = patterns.newline +local p_whitespace = patterns.whitespace -if not utf then +if not unicode then - utf8 = { } - unicode.utf8 = utf8 - utf = utf8 + unicode = { utf = utf } -- for a while end @@ -89,64 +105,13 @@ if not utf.byte then end -if not utf.sub then - - local utf8char = patterns.utf8char - - -- inefficient as lpeg just copies ^n - - -- local function sub(str,start,stop) - -- local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1)) - -- inspect(pattern) - -- return lpegmatch(pattern,str) or "" - -- end - - local b, e, n, first, last = 0, 0, 0, 0, 0 - - local function slide(s,p) - n = n + 1 - if n == first then - b = p - if not last then - return nil - end - end - if n == last then - e = p - return nil - else - return p - end - end - - local pattern = Cmt(utf8char,slide)^0 - - function utf.sub(str,start,stop) -- todo: from the end - if not start then - return str - end - b, e, n, first, last = 0, 0, 0, start, stop - lpegmatch(pattern,str) - if not stop then - return sub(str,b) - else - return sub(str,b,e) - end - end - - -- print(utf.sub("Hans Hagen is my name")) - -- print(utf.sub("Hans Hagen is my name",5)) - -- print(utf.sub("Hans Hagen is my name",5,10)) - -end - local utfchar, utfbyte = utf.char, utf.byte -- As we want to get rid of the (unmaintained) utf library we implement our own -- variants (in due time an independent module): -function unicode.filetype(data) - return data and lpegmatch(utftype,data) or "unknown" +function utf.filetype(data) + return data and lpegmatch(p_utftype,data) or "unknown" end local toentities = Cs ( @@ -257,7 +222,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) -function string.toutf(s) +function string.toutf(s) -- in string namespace return lpegmatch(pattern,s) or s -- todo: utf32 end @@ -273,26 +238,269 @@ local validatedutf = Cs ( patterns.validatedutf = validatedutf -function string.validutf(str) - return lpegmatch(validatedutf,str) +function utf.is_valid(str) + return type(str) == "string" and lpegmatch(validatedutf,str) or false end +if not utf.len then -utf.length = string.utflength -utf.split = string.utfsplit -utf.splitines = string.utfsplitlines -utf.valid = string.validutf + -- -- alternative 1: 0.77 + -- + -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0) + -- + -- function utf.len(str) + -- return #lpegmatch(utfcharcounter,str or "") + -- end + -- + -- -- alternative 2: 1.70 + -- + -- local n = 0 + -- + -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow + -- + -- function utf.length(str) + -- n = 0 + -- lpegmatch(utfcharcounter,str or "") + -- return n + -- end + -- + -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + + -- local n = 0 + -- + -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * ( + -- -- patterns.utf8one ^1 * Cc(1) + -- -- + patterns.utf8two ^1 * Cc(2) + -- -- + patterns.utf8three^1 * Cc(3) + -- -- + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end + -- -- )^0 ) -- just as many captures as below + -- + -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( + -- -- (Cmt(patterns.utf8one ^1,function(_,_,s) n = n + #s return true end)) + -- -- + (Cmt(patterns.utf8two ^1,function(_,_,s) n = n + #s/2 return true end)) + -- -- + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end)) + -- -- + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end)) + -- -- )^0 ) -- not interesting as it creates strings but sometimes faster + -- + -- -- The best so far: + -- + -- local utfcharcounter = utfbom^-1 * P ( ( + -- Cp() * (patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + -- + Cp() * (patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + -- + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + -- + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end + -- )^0 ) + + -- function utf.len(str) + -- n = 0 + -- lpegmatch(utfcharcounter,str or "") + -- return n + -- end + + local n, f = 0, 1 + + local utfcharcounter = patterns.utfbom^-1 * Cmt ( + Cc(1) * patterns.utf8one ^1 + + Cc(2) * patterns.utf8two ^1 + + Cc(3) * patterns.utf8three^1 + + Cc(4) * patterns.utf8four ^1, + function(_,t,d) -- due to Cc no string captures, so faster + n = n + (t - f)/d + f = t + return true + end + )^0 + + function utf.len(str) + n, f = 0, 1 + lpegmatch(utfcharcounter,str or "") + return n + end -if not utf.len then - utf.len = utf.length end --- a replacement for simple gsubs: +utf.length = utf.len + +if not utf.sub then + + -- inefficient as lpeg just copies ^n + + -- local function sub(str,start,stop) + -- local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1)) + -- inspect(pattern) + -- return lpegmatch(pattern,str) or "" + -- end + + -- local b, e, n, first, last = 0, 0, 0, 0, 0 + -- + -- local function slide(s,p) + -- n = n + 1 + -- if n == first then + -- b = p + -- if not last then + -- return nil + -- end + -- end + -- if n == last then + -- e = p + -- return nil + -- else + -- return p + -- end + -- end + -- + -- local pattern = Cmt(p_utf8char,slide)^0 + -- + -- function utf.sub(str,start,stop) -- todo: from the end + -- if not start then + -- return str + -- end + -- b, e, n, first, last = 0, 0, 0, start, stop + -- lpegmatch(pattern,str) + -- if not stop then + -- return sub(str,b) + -- else + -- return sub(str,b,e-1) + -- end + -- end + + -- print(utf.sub("Hans Hagen is my name")) + -- print(utf.sub("Hans Hagen is my name",5)) + -- print(utf.sub("Hans Hagen is my name",5,10)) + + local utflength = utf.length + + -- also negative indices, upto 10 times slower than a c variant + + local b, e, n, first, last = 0, 0, 0, 0, 0 + + local function slide_zero(s,p) + n = n + 1 + if n >= last then + e = p - 1 + else + return p + end + end + + local function slide_one(s,p) + n = n + 1 + if n == first then + b = p + end + if n >= last then + e = p - 1 + else + return p + end + end + + local function slide_two(s,p) + n = n + 1 + if n == first then + b = p + else + return true + end + end + + local pattern_zero = Cmt(p_utf8char,slide_zero)^0 + local pattern_one = Cmt(p_utf8char,slide_one )^0 + local pattern_two = Cmt(p_utf8char,slide_two )^0 + + function utf.sub(str,start,stop) + if not start then + return str + end + if start == 0 then + start = 1 + end + if not stop then + if start < 0 then + local l = utflength(str) -- we can inline this function if needed + start = l + start + else + start = start - 1 + end + b, n, first = 0, 0, start + lpegmatch(pattern_two,str) + if n >= first then + return sub(str,b) + else + return "" + end + end + if start < 0 or stop < 0 then + local l = utf.length(str) + if start < 0 then + start = l + start + if start <= 0 then + start = 1 + else + start = start + 1 + end + end + if stop < 0 then + stop = l + stop + if stop == 0 then + stop = 1 + else + stop = stop + 1 + end + end + end + if start > stop then + return "" + elseif start > 1 then + b, e, n, first, last = 0, 0, 0, start - 1, stop + lpegmatch(pattern_one,str) + if n >= first and e == 0 then + e = #str + end + return sub(str,b,e) + else + b, e, n, last = 1, 0, 0, stop + lpegmatch(pattern_zero,str) + if e == 0 then + e = #str + end + return sub(str,b,e) + end + end -local utf8char = patterns.utf8char + -- local n = 100000 + -- local str = string.rep("123456àáâãäå",100) + -- + -- for i=-15,15,1 do + -- for j=-15,15,1 do + -- if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then + -- print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j)) + -- end + -- end + -- if utf.xsub(str,i) ~= utf.sub(str,i) then + -- print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i)) + -- end + -- end + + -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7)) + -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7)) + -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9)) + -- print(" 4 ",utf.xsub(str, 4 ),utf.sub(str, 4 )) + -- print(" 0 ",utf.xsub(str, 0 ),utf.sub(str, 0 )) + -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0)) + -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4)) + -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0)) + -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0)) + -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3)) + -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3)) + -- print("-3 ",utf.xsub(str,-3 ),utf.sub(str,-3 )) + +end + +-- a replacement for simple gsubs: function utf.remapper(mapping) - local pattern = Cs((utf8char/mapping)^0) + local pattern = Cs((p_utf8char/mapping)^0) return function(str) if not str or str == "" then return "" @@ -305,158 +513,113 @@ end -- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } -- print(remap("abcd 1234 abcd")) +-- + +function utf.replacer(t) -- no precheck, always string builder + local r = replacer(t,false,false,true) + return function(str) + return lpegmatch(r,str) + end +end + +function utf.subtituter(t) -- with precheck and no building if no match + local f = finder (t) + local r = replacer(t,false,false,true) + return function(str) + local i = lpegmatch(f,str) + if not i then + return str + elseif i > #str then + return str + else + -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower + return lpegmatch(r,str) + end + end +end + +-- inspect(utf.split("a b c d")) +-- inspect(utf.split("a b c d",true)) + +local utflinesplitter = p_utfbom^-1 * lpeg.tsplitat(p_newline) +local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0) +local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0) +local utfcharsplitter_raw = Ct(C(p_utf8char)^0) + +patterns.utflinesplitter = utflinesplitter + +function utf.splitlines(str) + return lpegmatch(utflinesplitter,str or "") +end + +function utf.split(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +function utf.totable(str) -- keeps bom + return lpegmatch(utfcharsplitter_raw,str) +end + -- 0 EF BB BF UTF-8 -- 1 FF FE UTF-16-little-endian -- 2 FE FF UTF-16-big-endian -- 3 FF FE 00 00 UTF-32-little-endian -- 4 00 00 FE FF UTF-32-big-endian - -unicode.utfname = { - [0] = 'utf-8', - [1] = 'utf-16-le', - [2] = 'utf-16-be', - [3] = 'utf-32-le', - [4] = 'utf-32-be' -} - +-- -- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated -function unicode.utftype(f) - local str = f:read(4) - if not str then - f:seek('set') - return 0 - -- elseif find(str,"^%z%z\254\255") then -- depricated - -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged - elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH) - return 4 - -- elseif find(str,"^\255\254%z%z") then -- depricated - -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged - elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH) - return 3 - elseif find(str,"^\254\255") then - f:seek('set',2) - return 2 - elseif find(str,"^\255\254") then - f:seek('set',2) - return 1 - elseif find(str,"^\239\187\191") then - f:seek('set',3) - return 0 - else - f:seek('set') - return 0 +-- utf.name = { +-- [0] = 'utf-8', +-- [1] = 'utf-16-le', +-- [2] = 'utf-16-be', +-- [3] = 'utf-32-le', +-- [4] = 'utf-32-be' +-- } +-- +-- function utf.magic(f) +-- local str = f:read(4) +-- if not str then +-- f:seek('set') +-- return 0 +-- -- elseif find(str,"^%z%z\254\255") then -- depricated +-- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged +-- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH) +-- return 4 +-- -- elseif find(str,"^\255\254%z%z") then -- depricated +-- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged +-- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH) +-- return 3 +-- elseif find(str,"^\254\255") then +-- f:seek('set',2) +-- return 2 +-- elseif find(str,"^\255\254") then +-- f:seek('set',2) +-- return 1 +-- elseif find(str,"^\239\187\191") then +-- f:seek('set',3) +-- return 0 +-- else +-- f:seek('set') +-- return 0 +-- end +-- end + +function utf.magic(f) -- not used + local str = f:read(4) or "" + local off = lpegmatch(p_utfoffset,str) + if off < 4 then + f:seek('set',off) end + return lpegmatch(p_utftype,str) end ---~ function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg ---~ local result, tmp, n, m, p, r, t = { }, { }, 0, 0, 0, 0, 0 -- we reuse tmp ---~ -- lf | cr | crlf / (cr:13, lf:10) ---~ local function doit() -- inline this ---~ if n == 10 then ---~ if p ~= 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = 0 ---~ end ---~ elseif n == 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = n ---~ else ---~ t = t + 1 ---~ tmp[t] = utfchar(n) ---~ p = 0 ---~ end ---~ end ---~ for l,r in bytepairs(str) do ---~ if r then ---~ if endian then -- maybe make two loops ---~ n = 256*l + r ---~ else ---~ n = 256*r + l ---~ end ---~ if m > 0 then ---~ n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 ---~ m = 0 ---~ doit() ---~ elseif n >= 0xD800 and n <= 0xDBFF then ---~ m = n ---~ else ---~ doit() ---~ end ---~ end ---~ end ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t ---~ end ---~ return result ---~ end - ---~ function unicode.utf32_to_utf8(str, endian) ---~ local result, tmp, n, m, p, r, t = { }, { }, 0, -1, 0, 0, 0 ---~ -- lf | cr | crlf / (cr:13, lf:10) ---~ local function doit() -- inline this ---~ if n == 10 then ---~ if p ~= 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = 0 ---~ end ---~ elseif n == 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = n ---~ else ---~ t = t + 1 ---~ tmp[t] = utfchar(n) ---~ p = 0 ---~ end ---~ end ---~ for a,b in bytepairs(str) do ---~ if a and b then ---~ if m < 0 then ---~ if endian then -- maybe make two loops ---~ m = 256*256*256*a + 256*256*b ---~ else ---~ m = 256*b + a ---~ end ---~ else ---~ if endian then -- maybe make two loops ---~ n = m + 256*a + b ---~ else ---~ n = m + 256*256*256*b + 256*256*a ---~ end ---~ m = -1 ---~ doit() ---~ end ---~ else ---~ break ---~ end ---~ end ---~ if #tmp > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t ---~ end ---~ return result ---~ end - local function utf16_to_utf8_be(t) if type(t) == "string" then - t = utfsplitlines(str) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -484,7 +647,7 @@ end local function utf16_to_utf8_le(t) if type(t) == "string" then - t = utfsplitlines(str) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -512,7 +675,7 @@ end local function utf32_to_utf8_be(t) if type(t) == "string" then - t = utfsplitlines(t) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -537,7 +700,7 @@ end local function utf32_to_utf8_le(t) if type(t) == "string" then - t = utfsplitlines(t) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -560,20 +723,20 @@ local function utf32_to_utf8_le(t) return t end -unicode.utf32_to_utf8_be = utf32_to_utf8_be -unicode.utf32_to_utf8_le = utf32_to_utf8_le -unicode.utf16_to_utf8_be = utf16_to_utf8_be -unicode.utf16_to_utf8_le = utf16_to_utf8_le +utf.utf32_to_utf8_be = utf32_to_utf8_be +utf.utf32_to_utf8_le = utf32_to_utf8_le +utf.utf16_to_utf8_be = utf16_to_utf8_be +utf.utf16_to_utf8_le = utf16_to_utf8_le -function unicode.utf8_to_utf8(t) - return type(t) == "string" and utfsplitlines(t) or t +function utf.utf8_to_utf8(t) + return type(t) == "string" and lpegmatch(utflinesplitter,t) or t end -function unicode.utf16_to_utf8(t,endian) +function utf.utf16_to_utf8(t,endian) return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t end -function unicode.utf32_to_utf8(t,endian) +function utf.utf32_to_utf8(t,endian) return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t end @@ -599,7 +762,7 @@ local function big(c) end end --- function unicode.utf8_to_utf16(str,littleendian) +-- function utf.utf8_to_utf16(str,littleendian) -- if littleendian then -- return char(255,254) .. utfgsub(str,".",little) -- else @@ -610,7 +773,7 @@ end local _, l_remap = utf.remapper(little) local _, b_remap = utf.remapper(big) -function unicode.utf8_to_utf16(str,littleendian) +function utf.utf8_to_utf16(str,littleendian) if littleendian then return char(255,254) .. lpegmatch(l_remap,str) else @@ -618,27 +781,67 @@ function unicode.utf8_to_utf16(str,littleendian) end end -function unicode.utfcodes(str) - local t, n = { }, 0 - for u in utfvalues(str) do - n = n + 1 - t[n] = format("0x%04X",u) - end - return concat(t,separator or " ") +-- function utf.tocodes(str,separator) -- can be sped up with an lpeg +-- local t, n = { }, 0 +-- for u in utfvalues(str) do +-- n = n + 1 +-- t[n] = format("0x%04X",u) +-- end +-- return concat(t,separator or " ") +-- end + +local pattern = Cs ( + (p_utf8byte / function(unicode ) return format( "0x%04X", unicode) end) * + (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0 +) + +function utf.tocodes(str,separator) + return lpegmatch(pattern,str,1,separator or " ") end -function unicode.ustring(s) +function utf.ustring(s) return format("U+%05X",type(s) == "number" and s or utfbyte(s)) end -function unicode.xstring(s) +function utf.xstring(s) return format("0x%05X",type(s) == "number" and s or utfbyte(s)) end -- -local pattern = Ct(C(patterns.utf8char)^0) +local p_nany = p_utf8char / "" + +if utfgmatch then + + function utf.count(str,what) + if type(what) == "string" then + local n = 0 + for _ in utfgmatch(str,what) do + n = n + 1 + end + return n + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) + end + end + +else + + local cache = { } + + function utf.count(str,what) + if type(what) == "string" then + local p = cache[what] + if not p then + p = Cs((P(what)/" " + p_nany)^0) + cache[p] = p + end + return #lpegmatch(p,str) + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) + end + end -function utf.totable(str) - return lpegmatch(pattern,str) end + +-- maybe also register as string.utf* diff --git a/tex/context/base/lang-ini.lua b/tex/context/base/lang-ini.lua index eaedcd69a..02a33c181 100644 --- a/tex/context/base/lang-ini.lua +++ b/tex/context/base/lang-ini.lua @@ -17,7 +17,6 @@ if not modules then modules = { } end modules ['lang-ini'] = { --~ lang:hyphenation(string) string = lang:hyphenation() lang:clear_hyphenation() local type, tonumber = type, tonumber -local utf = unicode.utf8 local utfbyte = utf.byte local format, gsub = string.format, string.gsub local concat, sortedkeys, sortedpairs = table.concat, table.sortedkeys, table.sortedpairs diff --git a/tex/context/base/lang-url.lua b/tex/context/base/lang-url.lua index 3b354216a..35381e672 100644 --- a/tex/context/base/lang-url.lua +++ b/tex/context/base/lang-url.lua @@ -6,10 +6,7 @@ if not modules then modules = { } end modules ['lang-url'] = { license = "see context related readme files" } -local utf = unicode.utf8 - -local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues -local utfbyte, utfchar = utf.byte, utf.char +local utfcharacters, utfvalues, utfbyte, utfchar = utf.characters, utf.values, utf.byte, utf.char context = context diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua index c5bc75ca2..84d6107d4 100644 --- a/tex/context/base/lang-wrd.lua +++ b/tex/context/base/lang-wrd.lua @@ -6,8 +6,8 @@ if not modules then modules = { } end modules ['lang-wrd'] = { license = "see context related readme files" } -local utf = unicode.utf8 -local lower, utfchar = string.lower, utf.char +local lower = string.lower +local utfchar = utf.char local concat = table.concat local lpegmatch = lpeg.match local P, S, Cs = lpeg.P, lpeg.S, lpeg.Cs diff --git a/tex/context/base/lpdf-epa.lua b/tex/context/base/lpdf-epa.lua index 03a36f2dc..93e494a31 100644 --- a/tex/context/base/lpdf-epa.lua +++ b/tex/context/base/lpdf-epa.lua @@ -12,6 +12,8 @@ if not modules then modules = { } end modules ['lpdf-epa'] = { local type, tonumber = type, tonumber local format, gsub = string.format, string.gsub +----- lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns + local trace_links = false trackers.register("figures.links", function(v) trace_links = v end) local report_link = logs.reporter("backend","merging") @@ -20,6 +22,9 @@ local backends, lpdf = backends, lpdf local variables = interfaces.variables local codeinjections = backends.pdf.codeinjections +----- urlescaper = lpegpatterns.urlescaper +----- utftohigh = lpegpatterns.utftohigh +local escapetex = characters.filters.utf.private.escape local layerspec = { -- predefining saves time "epdflinks" @@ -53,19 +58,22 @@ local function add_link(x,y,w,h,destination,what) end local function link_goto(x,y,w,h,document,annotation,pagedata,namespace) - local destination = annotation.A.D -- [ 18 0 R /Fit ] - local what = "page" - if type(destination) == "string" then - local destinations = document.destinations - local wanted = destinations[destination] - destination = wanted and wanted.D - if destination then what = "named" end - end - local pagedata = destination and destination[1] - if pagedata then - local destinationpage = pagedata.number - if destinationpage then - add_link(x,y,w,h,namespace .. destinationpage,what) + local a = annotation.A + if a then + local destination = a.D -- [ 18 0 R /Fit ] + local what = "page" + if type(destination) == "string" then + local destinations = document.destinations + local wanted = destinations[destination] + destination = wanted and wanted.D + if destination then what = "named" end + end + local pagedata = destination and destination[1] + if pagedata then + local destinationpage = pagedata.number + if destinationpage then + add_link(x,y,w,h,namespace .. destinationpage,what) + end end end end @@ -73,24 +81,31 @@ end local function link_uri(x,y,w,h,document,annotation) local url = annotation.A.URI if url then + -- url = lpegmatch(urlescaper,url) + -- url = lpegmatch(utftohigh,url) + url = escapetex(url) add_link(x,y,w,h,format("url(%s)",url),"url") end end local function link_file(x,y,w,h,document,annotation) - local filename = annotation.A.F - if filename then - local destination = annotation.A.D - if not destination then - add_link(x,y,w,h,format("file(%s)",filename),"file") - elseif type(destination) == "string" then - add_link(x,y,w,h,format("%s::%s",filename,destination),"file (named)") - else - destination = destination[1] -- array - if tonumber(destination) then - add_link(x,y,w,h,format("%s::page(%s)",filename,destination),"file (page)") - else + local a = annotation.A + if a then + local filename = a.F + if filename then + filename = escapetex(filename) + local destination = a.D + if not destination then add_link(x,y,w,h,format("file(%s)",filename),"file") + elseif type(destination) == "string" then + add_link(x,y,w,h,format("%s::%s",filename,destination),"file (named)") + else + destination = destination[1] -- array + if tonumber(destination) then + add_link(x,y,w,h,format("%s::page(%s)",filename,destination),"file (page)") + else + add_link(x,y,w,h,format("file(%s)",filename),"file") + end end end end @@ -110,41 +125,50 @@ function codeinjections.mergereferences(specification) local yscale = specification.yscale or 1 local size = specification.size or "crop" -- todo local pagedata = document.pages[pagenumber] - local annotations = pagedata.Annots + local annotations = pagedata and pagedata.Annots if annotations and annotations.n > 0 then - local namespace = format("lpdf-epa-%s-",file.removesuffix(file.basename(fullname))) - local reference = namespace .. pagenumber + local namespace = format("lpdf-epa-%s-",file.removesuffix(file.basename(fullname))) + local reference = namespace .. pagenumber local mediabox = pagedata.MediaBox local llx, lly, urx, ury = mediabox[1], mediabox[2], mediabox[3], mediabox[4] local width, height = xscale * (urx - llx), yscale * (ury - lly) -- \\overlaywidth, \\overlayheight context.definelayer( { "epdflinks" }, { height = height.."bp" , width = width.."bp" }) for i=1,annotations.n do local annotation = annotations[i] - local subtype = annotation.Subtype - local rectangle = annotation.Rect - local a_llx, a_lly, a_urx, a_ury = rectangle[1], rectangle[2], rectangle[3], rectangle[4] - local x, y = xscale * (a_llx - llx), yscale * (a_lly - lly) - local w, h = xscale * (a_urx - a_llx), yscale * (a_ury - a_lly) - if subtype == "Link" then - local linktype = annotation.A.S - if linktype == "GoTo" then - link_goto(x,y,w,h,document,annotation,pagedata,namespace) - elseif linktype == "GoToR" then - link_file(x,y,w,h,document,annotation) - elseif linktype == "URI" then - link_uri(x,y,w,h,document,annotation) + if annotation then + local subtype = annotation.Subtype + local rectangle = annotation.Rect + local a_llx, a_lly, a_urx, a_ury = rectangle[1], rectangle[2], rectangle[3], rectangle[4] + local x, y = xscale * (a_llx - llx), yscale * (a_lly - lly) + local w, h = xscale * (a_urx - a_llx), yscale * (a_ury - a_lly) + if subtype == "Link" then + local a = annotation.A + if a then + local linktype = a.S + if linktype == "GoTo" then + link_goto(x,y,w,h,document,annotation,pagedata,namespace) + elseif linktype == "GoToR" then + link_file(x,y,w,h,document,annotation) + elseif linktype == "URI" then + link_uri(x,y,w,h,document,annotation) + elseif trace_links then + report_link("unsupported link annotation %q",linktype) + end + else + report_link("mising link annotation") + end elseif trace_links then - report_link("unsupported link annotation '%s'",linktype) + report_link("unsupported annotation %q",subtype) end elseif trace_links then - report_link("unsupported annotation '%s'",subtype) + report_link("broken annotation, index: %i",i) end end context.flushlayer { "epdflinks" } -- context("\\gdef\\figurereference{%s}",reference) -- global context.setgvalue("figurereference",reference) -- global if trace_links then - report_link("setting figure reference to '%s'",reference) + report_link("setting figure reference to %q",reference) end specification.reference = reference return namespace @@ -171,19 +195,24 @@ function codeinjections.mergeviewerlayers(specification) local layers = document.layers if layers then for i=1,layers.n do - local tag = namespace .. gsub(layers[i]," ",":") - local title = tag - if trace_links then - report_link("using layer '%s'",tag) + local layer = layers[i] + if layer then + local tag = namespace .. gsub(layer," ",":") + local title = tag + if trace_links then + report_link("using layer %q",tag) + end + attributes.viewerlayers.define { -- also does some cleaning + tag = tag, -- todo: #3A or so + title = title, + visible = variables.start, + editable = variables.yes, + printable = variables.yes, + } + codeinjections.useviewerlayer(tag) + elseif trace_links then + report_link("broken layer, index: %i",i) end - attributes.viewerlayers.define { -- also does some cleaning - tag = tag, -- todo: #3A or so - title = title, - visible = variables.start, - editable = variables.yes, - printable = variables.yes, - } - codeinjections.useviewerlayer(tag) end end end diff --git a/tex/context/base/lpdf-epd.lua b/tex/context/base/lpdf-epd.lua index 1c4b4b5c5..b9f8cfc7c 100644 --- a/tex/context/base/lpdf-epd.lua +++ b/tex/context/base/lpdf-epd.lua @@ -27,6 +27,8 @@ local lower, match, char, find, sub = string.lower, string.match, string.char, s local concat = table.concat local toutf = string.toutf +local report_epdf = logs.reporter("epdf") + -- a bit of protection local limited = false @@ -59,9 +61,7 @@ local function prepare(document,d,t,n,k) for i=1,n do local v = d:getVal(i) local r = d:getValNF(i) - if r:getTypeName() ~= "ref" then - t[d:getKey(i)] = checked_access[v:getTypeName()](v,document) - else + if r:getTypeName() == "ref" then r = r:getRef().num local c = document.cache[r] if c then @@ -74,6 +74,8 @@ local function prepare(document,d,t,n,k) end end t[d:getKey(i)] = c + else + t[d:getKey(i)] = checked_access[v:getTypeName()](v,document) end end getmetatable(t).__index = nil @@ -95,9 +97,9 @@ local function prepare(document,a,t,n,k) for i=1,n do local v = a:get(i) local r = a:getNF(i) - if r:getTypeName() ~= "ref" then - t[i] = checked_access[v:getTypeName()](v,document) - else + if v:getTypeName() == "null" then + -- TH: weird, but appears possible + elseif r:getTypeName() == "ref" then r = r:getRef().num local c = document.cache[r] if c then @@ -108,6 +110,8 @@ local function prepare(document,a,t,n,k) document.xrefs[c] = r end t[i] = c + else + t[i] = checked_access[v:getTypeName()](v,document) end end getmetatable(t).__index = nil @@ -267,10 +271,14 @@ local function getpages(document) for pagenumber=1,nofpages do local pagereference = cata:getPageRef(pagenumber).num local pagedata = some_dictionary(xref:fetch(pagereference,0):getDict(),document,pagereference) - pagedata.number = pagenumber - pages[pagenumber] = pagedata - xrefs[pagedata] = pagereference - cache[pagereference] = pagedata + if pagedata then + pagedata.number = pagenumber + pages[pagenumber] = pagedata + xrefs[pagedata] = pagereference + cache[pagereference] = pagedata + else + report_epdf("missing pagedata at slot %i",i) + end end pages.n = nofpages return pages diff --git a/tex/context/base/lpdf-ini.lua b/tex/context/base/lpdf-ini.lua index 1591d6da1..c1b742949 100644 --- a/tex/context/base/lpdf-ini.lua +++ b/tex/context/base/lpdf-ini.lua @@ -8,8 +8,7 @@ if not modules then modules = { } end modules ['lpdf-ini'] = { local setmetatable, getmetatable, type, next, tostring, tonumber, rawset = setmetatable, getmetatable, type, next, tostring, tonumber, rawset local char, byte, format, gsub, concat, match, sub, gmatch = string.char, string.byte, string.format, string.gsub, table.concat, string.match, string.sub, string.gmatch -local utfvalues = string.utfvalues -local utfchar = utf.char +local utfchar, utfvalues = utf.char, utf.values local sind, cosd = math.sind, math.cosd local lpegmatch, P, C, R, S, Cc, Cs = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cc, lpeg.Cs diff --git a/tex/context/base/lpdf-swf.lua b/tex/context/base/lpdf-swf.lua index 4bbec8dbe..d3c2b41da 100644 --- a/tex/context/base/lpdf-swf.lua +++ b/tex/context/base/lpdf-swf.lua @@ -108,7 +108,7 @@ local function insertswf(spec) local names = configuration.Assets.Names local prefix = false if root ~= "" and root ~= "." then - prefix = format("^%s/",string.escapedpattern(root,true)) + prefix = format("^%s/",string.topattern(root)) end if prefix and trace_swf then report_swf("using strip pattern '%s'",prefix) diff --git a/tex/context/base/luat-bas.mkiv b/tex/context/base/luat-bas.mkiv index 4d6f6d03b..e24568b0a 100644 --- a/tex/context/base/luat-bas.mkiv +++ b/tex/context/base/luat-bas.mkiv @@ -13,10 +13,11 @@ \writestatus{loading}{ConTeXt Lua Macros / Basic Lua Libraries} +\registerctxluafile{l-lua} {1.001} +\registerctxluafile{l-lpeg} {1.001} \registerctxluafile{l-function}{1.001} \registerctxluafile{l-string} {1.001} \registerctxluafile{l-table} {1.001} -\registerctxluafile{l-lpeg} {1.001} \registerctxluafile{l-boolean} {1.001} \registerctxluafile{l-number} {1.001} \registerctxluafile{l-math} {1.001} diff --git a/tex/context/base/luat-cnf.lua b/tex/context/base/luat-cnf.lua index 07a5d51f2..e574bf428 100644 --- a/tex/context/base/luat-cnf.lua +++ b/tex/context/base/luat-cnf.lua @@ -30,22 +30,22 @@ texconfig.param_size = 25000 -- 60 texconfig.save_size = 50000 -- 4000 texconfig.stack_size = 10000 -- 300 ---~ local function initialize() ---~ local t, variable = allocate(), resolvers.variable ---~ for name, default in next, variablenames do ---~ local name = variablenames[i] ---~ local value = variable(name) ---~ value = tonumber(value) ---~ if not value or value == "" or value == 0 then ---~ value = default ---~ end ---~ texconfig[name], t[name] = value, value ---~ end ---~ initialize = nil ---~ return t ---~ end - ---~ luatex.variables = initialize() +-- local function initialize() +-- local t, variable = allocate(), resolvers.variable +-- for name, default in next, variablenames do +-- local name = variablenames[i] +-- local value = variable(name) +-- value = tonumber(value) +-- if not value or value == "" or value == 0 then +-- value = default +-- end +-- texconfig[name], t[name] = value, value +-- end +-- initialize = nil +-- return t +-- end +-- +-- luatex.variables = initialize() local stub = [[ @@ -71,9 +71,9 @@ function texconfig.init() local builtin, globals = { }, { } - libraries = { -- we set it her as we want libraries also 'indexed' + libraries = { -- we set it here as we want libraries also 'indexed' basiclua = { - "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", + "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", }, basictex = { -- noad "callback", "font", "img", "lang", "lua", "node", "pdf", "status", "tex", "texconfig", "texio", "token", @@ -88,6 +88,14 @@ function texconfig.init() "fontforge", -- can be filled by luat-log "kpse", }, + functions = { + "assert", "pcall", "xpcall", "error", "collectgarbage", + "dofile", "load","loadfile", "require", "module", + "getmetatable", "setmetatable", + "ipairs", "pairs", "rawequal", "rawget", "rawset", "next", + "tonumber", "tostring", + "type", "unpack", "select", "print", + }, builtin = builtin, -- to be filled globals = globals, -- to be filled } @@ -96,27 +104,33 @@ function texconfig.init() globals[k] = tostring(v) end - local function collect(t) + local function collect(t,fnc) local lib = { } for k, v in next, t do - local keys = { } - local gv = _G[v] - if type(gv) == "table" then - for k, v in next, gv do - keys[k] = tostring(v) -- true -- by tostring we cannot call overloades functions (security) + if fnc then + lib[v] = _G[v] + else + local keys = { } + local gv = _G[v] + local tv = type(gv) + if tv == "table" then + for k, v in next, gv do + keys[k] = tostring(v) -- true -- by tostring we cannot call overloades functions (security) + end end + lib[v] = keys + builtin[v] = keys end - lib[v] = keys - builtin[v] = keys end return lib end - libraries.basiclua = collect(libraries.basiclua) - libraries.basictex = collect(libraries.basictex) - libraries.extralua = collect(libraries.extralua) - libraries.extratex = collect(libraries.extratex) - libraries.obsolete = collect(libraries.obsolete) + libraries.basiclua = collect(libraries.basiclua) + libraries.basictex = collect(libraries.basictex) + libraries.extralua = collect(libraries.extralua) + libraries.extratex = collect(libraries.extratex) + libraries.functions = collect(libraries.functions,true) + libraries.obsolete = collect(libraries.obsolete) -- shortcut and helper @@ -150,7 +164,6 @@ end) -- done, from now on input and callbacks are internal ]] - local variablenames = { "error_line", "half_error_line", "expand_depth", "hash_extra", "nest_size", diff --git a/tex/context/base/luat-cod.mkiv b/tex/context/base/luat-cod.mkiv index beb631844..9ce6161c3 100644 --- a/tex/context/base/luat-cod.mkiv +++ b/tex/context/base/luat-cod.mkiv @@ -67,7 +67,7 @@ %D Reporting the version of \LUA\ that we use is done as follows: -\edef\luaversion{\ctxwrite{_VERSION}} % no context luaded yet +\edef\luaversion{\ctxwrite{_VERSION}} \def\registerctxluafile#1#2{\ctxlua{lua.registercode("#1","#2")}} \def\ctxloadluafile #1{\ctxlua{lua.registercode("#1")}} diff --git a/tex/context/base/luat-env.lua b/tex/context/base/luat-env.lua index 4d58897a0..7bce0052d 100644 --- a/tex/context/base/luat-env.lua +++ b/tex/context/base/luat-env.lua @@ -22,6 +22,10 @@ local format, sub, match, gsub, find = string.format, string.sub, string.match, local unquoted, quoted = string.unquoted, string.quoted local concat, insert, remove = table.concat, table.insert, table.remove local loadedluacode = utilities.lua.loadedluacode +local luasuffixes = utilities.lua.suffixes + +environment = environment or { } +local environment = environment -- precautions @@ -31,9 +35,29 @@ function os.setlocale() -- no way you can mess with it end --- dirty tricks +-- dirty tricks (we will replace the texlua call by luatex --luaonly) + +local validengines = allocate { + ["luatex"] = true, + ["luajittex"] = true, + -- ["luatex.exe"] = true, + -- ["luajittex.exe"] = true, +} + +local basicengines = allocate { + ["luatex"] = "luatex", + ["texlua"] = "luatex", + ["texluac"] = "luatex", + ["luajittex"] = "luajittex", + ["texluajit"] = "luajittex", + -- ["texlua.exe"] = "luatex", + -- ["texluajit.exe"] = "luajittex", +} -if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaonly" then +environment.validengines = validengines +environment.basicengines = basicengines + +if arg and validengines[file.removesuffix(arg[0])] and arg[1] == "--luaonly" then arg[-1] = arg[0] arg[ 0] = arg[2] for k=3,#arg do @@ -65,9 +89,6 @@ end -- environment -environment = environment or { } -local environment = environment - environment.arguments = allocate() environment.files = allocate() environment.sortedflags = nil @@ -123,7 +144,7 @@ function environment.initializearguments(arg) end end end - environment.ownname = environment.ownname or arg[0] or 'unknown.lua' + environment.ownname = file.reslash(environment.ownname or arg[0] or 'unknown.lua') end function environment.setargument(name,value) @@ -204,22 +225,22 @@ function environment.reconstructcommandline(arg,noquote) end end ---~ -- to be tested: ---~ ---~ function environment.reconstructcommandline(arg,noquote) ---~ arg = arg or environment.originalarguments ---~ if noquote and #arg == 1 then ---~ return unquoted(resolvers.resolve(arg[1])) ---~ elseif #arg > 0 then ---~ local result = { } ---~ for i=1,#arg do ---~ result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote ---~ end ---~ return concat(result," ") ---~ else ---~ return "" ---~ end ---~ end +-- -- to be tested: +-- +-- function environment.reconstructcommandline(arg,noquote) +-- arg = arg or environment.originalarguments +-- if noquote and #arg == 1 then +-- return unquoted(resolvers.resolve(arg[1])) +-- elseif #arg > 0 then +-- local result = { } +-- for i=1,#arg do +-- result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote +-- end +-- return concat(result," ") +-- else +-- return "" +-- end +-- end if arg then @@ -314,9 +335,11 @@ function environment.loadluafile(filename, version) local lucname, luaname, chunk local basename = file.removesuffix(filename) if basename == filename then - lucname, luaname = basename .. ".luc", basename .. ".lua" + luaname = fiule.addsuffix(basename,luasuffixes.lua) + lucname = fiule.addsuffix(basename,luasuffixes.luc) else - lucname, luaname = nil, basename -- forced suffix + luaname = basename -- forced suffix + lucname = nil end -- when not overloaded by explicit suffix we look for a luc file first local fullname = (lucname and environment.luafile(lucname)) or "" diff --git a/tex/context/base/luat-exe.lua b/tex/context/base/luat-exe.lua index 0d5cb5abc..a57a5a006 100644 --- a/tex/context/base/luat-exe.lua +++ b/tex/context/base/luat-exe.lua @@ -10,6 +10,7 @@ if not modules then modules = { } end modules ['luat-exe'] = { local match, find, gmatch = string.match, string.find, string.gmatch local concat = table.concat +local select = select local report_executers = logs.reporter("system","executers") @@ -29,20 +30,20 @@ local spawn = osspawn local popen = iopopen local function register(...) - local t = { ... } - for k=1,#t do - local v = t[k] - permitted[#permitted+1] = (v == "*" and ".*") or v + for k=1,select("#",...) do + local v = select(k,...) + permitted[#permitted+1] = v == "*" and ".*" or v end end local function prepare(...) -- todo: make more clever first split local t = { ... } + local n = #n local one = t[1] - if #t == 1 then + if n == 1 then if type(one) == 'table' then - return one, concat(t," ",2,#t) + return one, concat(t," ",2,n) else local name, arguments = match(one,"^(.-)%s+(.+)$") if name and arguments then @@ -52,7 +53,7 @@ local function prepare(...) end end else - return one, concat(t," ",2,#t) + return one, concat(t," ",2,n) end end diff --git a/tex/context/base/luat-fio.lua b/tex/context/base/luat-fio.lua index 8e7988c4e..0af9cb6fc 100644 --- a/tex/context/base/luat-fio.lua +++ b/tex/context/base/luat-fio.lua @@ -81,6 +81,9 @@ if not resolvers.instance then register('find_write_file' , function(id,name) return name end, true) register('find_format_file' , function(name) return name end, true) + register('find_cidmap_file' , function(name) return findbinfile(name,"cidmap") end, true) + -- register('read_cidmap_file' , function(file) return loadbinfile(file,"cidmap") end, true) + end end diff --git a/tex/context/base/luat-fmt.lua b/tex/context/base/luat-fmt.lua index 37b0f5166..2d2614ecb 100644 --- a/tex/context/base/luat-fmt.lua +++ b/tex/context/base/luat-fmt.lua @@ -8,13 +8,11 @@ if not modules then modules = { } end modules ['luat-fmt'] = { local format = string.format +local quoted = string.quoted +local luasuffixes = utilities.lua.suffixes local report_format = logs.reporter("resolvers","formats") --- helper for mtxrun - -local quoted = string.quoted - local function primaryflags() -- not yet ok local trackers = environment.argument("trackers") local directives = environment.argument("directives") @@ -29,13 +27,14 @@ local function primaryflags() -- not yet ok end function environment.make_format(name) + local engine = environment.ownmain or "luatex" -- change to format path (early as we need expanded paths) - local olddir = lfs.currentdir() - local path = caches.getwritablepath("formats") or "" -- maybe platform + local olddir = dir.current() + local path = caches.getwritablepath("formats",engine) or "" -- maybe platform if path ~= "" then lfs.chdir(path) end - report_format("format path: %s",lfs.currentdir()) + report_format("format path: %s",dir.current()) -- check source file local texsourcename = file.addsuffix(name,"mkiv") local fulltexsourcename = resolvers.findfile(texsourcename,"tex") or "" @@ -72,13 +71,12 @@ function environment.make_format(name) elseif type(usedlualibs) == "table" then report_format("using stub specification: %s",fullspecificationname) local texbasename = file.basename(name) - local luastubname = file.addsuffix(texbasename,"lua") - local lucstubname = file.addsuffix(texbasename,"luc") + local luastubname = file.addsuffix(texbasename,luasuffixes.lua) + local lucstubname = file.addsuffix(texbasename,luasuffixes.luc) -- pack libraries in stub report_format("creating initialization file: %s",luastubname) utilities.merger.selfcreate(usedlualibs,specificationpath,luastubname) -- compile stub file (does not save that much as we don't use this stub at startup any more) - local strip = resolvers.booleanvariable("LUACSTRIP", true) if utilities.lua.compile(luastubname,lucstubname) and lfs.isfile(lucstubname) then report_format("using compiled initialization file: %s",lucstubname) usedluastub = lucstubname @@ -92,7 +90,7 @@ function environment.make_format(name) return end -- generate format - local command = format("luatex --ini %s --lua=%s %s %sdump",primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\") + local command = format("%s --ini %s --lua=%s %s %sdump",engine,primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\") report_format("running command: %s\n",command) os.spawn(command) -- remove related mem files @@ -111,8 +109,9 @@ end function environment.run_format(name,data,more) if name and name ~= "" then + local engine = environment.ownmain or "luatex" local barename = file.removesuffix(name) - local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats") + local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats",engine) if fmtname == "" then fmtname = resolvers.findfile(file.addsuffix(barename,"fmt")) or "" end @@ -129,7 +128,7 @@ function environment.run_format(name,data,more) report_format("using format name: %s",fmtname) report_format("no luc/lua with name: %s",barename) else - local command = format("luatex %s --fmt=%s --lua=%s %s %s",primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "") + local command = format("%s %s --fmt=%s --lua=%s %s %s",engine,primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "") report_format("running command: %s",command) os.spawn(command) end diff --git a/tex/context/base/luat-ini.lua b/tex/context/base/luat-ini.lua index 0b8c5d951..587214b93 100644 --- a/tex/context/base/luat-ini.lua +++ b/tex/context/base/luat-ini.lua @@ -6,15 +6,16 @@ if not modules then modules = { } end modules ['luat-ini'] = { license = "see context related readme files" } --- rather experimental down here ... will change with lua 5.2 -- +-- rather experimental down here ... adapted to lua 5.2 ... but still +-- experimental local debug = require("debug") local string, table, lpeg, math, io, system = string, table, lpeg, math, io, system -local next, setfenv = next, setfenv or debug.setfenv +local rawset, rawget, next, setmetatable = rawset, rawget, next, setmetatable --[[ldx-- -

We cannot load anything yet. However what we will do us reserve a fewtables. +

We cannot load anything yet. However what we will do us reserve a few tables. These can be used for runtime user data or third party modules and will not be cluttered by macro package code.

--ldx]]-- @@ -38,115 +39,168 @@ thirddata['tricks' ] = { } --ldx]]-- --[[ldx-- -

We could cook up a readonly model for global tables but it -makes more sense to invite users to use one of the predefined -namespaces. One can redefine the protector. After all, it's -just a lightweight suggestive system, not a watertight -one.

+

We could cook up a readonly model for global tables but it makes more sense +to invite users to use one of the predefined namespaces. One can redefine the +protector. After all, it's just a lightweight suggestive system, not a +watertight one.

--ldx]]-- --- this will change when we move on to lua 5.2+ - -local global = _G - +local global = _G global.global = global ---~ rawset(global,"global",global) local dummy = function() end --- another approach is to freeze tables by using a metatable, this will be --- implemented stepwise - -local protected = { - -- global table - global = global, - -- user tables - -- moduledata = moduledata, - userdata = userdata, - thirddata = thirddata, - documentdata = documentdata, - -- reserved - protect = dummy, - unprotect = dummy, - -- luatex - tex = tex, - -- lua - string = string, - table = table, - lpeg = lpeg, - math = math, - io = io, - -- - -- maybe other l-*, xml etc -} +--[[ldx-- +

Another approach is to freeze tables by using a metatable, this will be +implemented stepwise.

+--ldx]]-- -- moduledata : no need for protection (only for developers) -- isolatedata : full protection -- userdata : protected -- thirddata : protected -userdata, thirddata = nil, nil - --- we could have a metatable that automaticaly creates a top level namespace +--[[ldx-- +

We could have a metatable that automaticaly creates a top level namespace.

+--ldx]]-- -if not setfenv then - texio.write_nl("warning: we need to fix setfenv by using 'load in' or '_ENV'") -end +local luanames = lua.name -- luatex itself + +lua.numbers = lua.numbers or { } local numbers = lua.numbers +lua.messages = lua.messages or { } local messages = lua.messages + +storage.register("lua/numbers", numbers, "lua.numbers" ) +storage.register("lua/messages", messages, "lua.messages") + +local setfenv = setfenv or debug.setfenv -- < 5.2 + +if setfenv then + + local protected = { + -- global table + global = global, + -- user tables + -- moduledata = moduledata, + userdata = userdata, + thirddata = thirddata, + documentdata = documentdata, + -- reserved + protect = dummy, + unprotect = dummy, + -- luatex + tex = tex, + -- lua + string = string, + table = table, + lpeg = lpeg, + math = math, + io = io, + file = file, + bit32 = bit32, + -- + context = context, + } + + local protect_full = function(name) + local t = { } + for k, v in next, protected do + t[k] = v + end + return t + end -local function protect_full(name) - local t = { } - for k, v in next, protected do - t[k] = v + local protect_part = function(name) -- adds + local t = rawget(global,name) + if not t then + t = { } + for k, v in next, protected do + t[k] = v + end + rawset(global,name,t) + end + return t end - return t -end -local function protect_part(name) ---~ local t = global[name] - local t = rawget(global,name) - if not t then - t = { } - for k, v in next, protected do - t[k] = v + protect = function(name) + if name == "isolateddata" then + setfenv(2,protect_full(name)) + else + setfenv(2,protect_part(name or "shareddata")) end ---~ global[name] = t - rawset(global,name,t) end - return t -end -function protect(name) - if name == "isolateddata" then - setfenv(2,protect_full(name)) - else - setfenv(2,protect_part(name or "shareddata")) + function lua.registername(name,message) + local lnn = lua.numbers[name] + if not lnn then + lnn = #messages + 1 + messages[lnn] = message + numbers[name] = lnn + end + luanames[lnn] = message + context(lnn) + -- initialize once + if name ~= "isolateddata" then + protect_full(name or "shareddata") + end end -end -lua.numbers = { } -lua.messages = { } +elseif libraries then -- assume >= 5.2 + + local shared + + protect = function(name) + if not shared then + -- e.g. context is not yet known + local public = { + global = global, + -- moduledata = moduledata, + userdata = userdata, + thirddata = thirddata, + documentdata = documentdata, + protect = dummy, + unprotect = dummy, + context = context, + } + -- + for k, v in next, libraries.builtin do public[k] = v end + for k, v in next, libraries.functions do public[k] = v end + for k, v in next, libraries.obsolete do public[k] = nil end + -- + shared = { __index = public } + protect = function(name) + local t = global[name] or { } + setmetatable(t,shared) -- set each time + return t + end + end + return protect(name) + end -function lua.registername(name,message) - local lnn = lua.numbers[name] - if not lnn then - lnn = #lua.messages + 1 - lua.messages[lnn] = message - lua.numbers[name] = lnn + function lua.registername(name,message) + local lnn = lua.numbers[name] + if not lnn then + lnn = #messages + 1 + messages[lnn] = message + numbers[name] = lnn + end + luanames[lnn] = message + context(lnn) end - lua.name[lnn] = message - context(lnn) - -- initialize once - if name ~= "isolateddata" then - protect_full(name or "shareddata") + +else + + protect = dummy + + function lua.registername(name,message) + local lnn = lua.numbers[name] + if not lnn then + lnn = #messages + 1 + messages[lnn] = message + numbers[name] = lnn + end + luanames[lnn] = message + context(lnn) end -end ---~ function lua.checknames() ---~ lua.name[0] = "ctx" ---~ for k, v in next, lua.messages do ---~ lua.name[k] = v ---~ end ---~ end +end -storage.register("lua/numbers", lua.numbers, "lua.numbers") -storage.register("lua/messages", lua.messages, "lua.messages") diff --git a/tex/context/base/luat-ini.mkiv b/tex/context/base/luat-ini.mkiv index 8ce5e80fd..a3a590311 100644 --- a/tex/context/base/luat-ini.mkiv +++ b/tex/context/base/luat-ini.mkiv @@ -27,14 +27,14 @@ %D A few more goodies: -\unexpanded\def\startlua {\luat_start_lua } \let\stoplua \relax % tex catcodes -\unexpanded\def\startluacode{\luat_start_lua_code} \let\stopluacode\relax % lua catcodes +\let\stoplua \relax % tex catcodes +\let\stopluacode\relax % lua catcodes % It might makes sense to have a \type {\directelua} so that we can avoid % the \type {\normalexpanded} around \type {\directlua}. Something to discuss % in the team. -\def\luat_start_lua +\unexpanded\def\startlua % \stoplua {\begingroup \obeylualines \luat_start_lua_indeed} @@ -42,7 +42,7 @@ \def\luat_start_lua_indeed#1\stoplua {\normalexpanded{\endgroup\noexpand\directlua{#1}}} % \zerocount is default -\def\luat_start_lua_code +\unexpanded\def\startluacode % \stopluacode {\begingroup \obeylualines \obeyluatokens @@ -62,42 +62,46 @@ \newtoks\everyluacode -\edef\lualetterbackslash{\string\\} -\edef\lualetterbar {\string\|} \edef\lualetterdash {\string\-} -\edef\lualetterlparent {\string\(} \edef\lualetterrparent {\string\)} -\edef\lualetterlbrace {\string\{} \edef\lualetterrbrace {\string\}} -\edef\lualettersquote {\string\'} \edef\lualetterdquote {\string\"} -\edef\lualettern {\string\n} \edef\lualetterr {\string\r} -\edef\lualetterf {\string\f} \edef\lualettert {\string\t} -\edef\lualettera {\string\a} \edef\lualetterb {\string\b} -\edef\lualetterv {\string\v} \edef\lualetters {\string\s} -\edef\lualetterone {\string\1} \edef\lualettertwo {\string\2} -\edef\lualetterthree {\string\3} \edef\lualetterfour {\string\4} -\edef\lualetterfive {\string\5} \edef\lualettersix {\string\6} -\edef\lualetterseven {\string\7} \edef\lualettereight {\string\8} -\edef\lualetternine {\string\9} \edef\lualetterzero {\string\0} +\edef\lua_letter_backslash{\string\\} +\edef\lua_letter_bar {\string\|} \edef\lua_letter_dash {\string\-} +\edef\lua_letter_lparent {\string\(} \edef\lua_letter_rparent {\string\)} +\edef\lua_letter_lbrace {\string\{} \edef\lua_letter_rbrace {\string\}} +\edef\lua_letter_squote {\string\'} \edef\lua_letter_dquote {\string\"} +\edef\lua_letter_n {\string\n} \edef\lua_letter_r {\string\r} +\edef\lua_letter_f {\string\f} \edef\lua_letter_t {\string\t} +\edef\lua_letter_a {\string\a} \edef\lua_letter_b {\string\b} +\edef\lua_letter_v {\string\v} \edef\lua_letter_s {\string\s} +\edef\lua_letter_one {\string\1} \edef\lua_letter_two {\string\2} +\edef\lua_letter_three {\string\3} \edef\lua_letter_four {\string\4} +\edef\lua_letter_five {\string\5} \edef\lua_letter_six {\string\6} +\edef\lua_letter_seven {\string\7} \edef\lua_letter_eight {\string\8} +\edef\lua_letter_nine {\string\9} \edef\lua_letter_zero {\string\0} \appendtoks - \let\\\lualetterbackslash - \let\|\lualetterbar \let\-\lualetterdash - \let\(\lualetterlparent \let\)\lualetterrparent - \let\{\lualetterlbrace \let\}\lualetterrbrace - \let\'\lualettersquote \let\"\lualetterdquote - \let\n\lualettern \let\r\lualetterr - \let\f\lualetterf \let\t\lualettert - \let\a\lualettera \let\b\lualetterb - \let\v\lualetterv \let\s\lualetters - \let\1\lualetterone \let\2\lualettertwo - \let\3\lualetterthree \let\4\lualetterfour - \let\5\lualetterfive \let\6\lualettersix - \let\7\lualetterseven \let\8\lualettereight - \let\9\lualetternine \let\0\lualetterzero + \let\\\lua_letter_backslash + \let\|\lua_letter_bar \let\-\lua_letter_dash + \let\(\lua_letter_lparent \let\)\lua_letter_rparent + \let\{\lua_letter_lbrace \let\}\lua_letter_rbrace + \let\'\lua_letter_squote \let\"\lua_letter_dquote + \let\n\lua_letter_n \let\r\lua_letter_r + \let\f\lua_letter_f \let\t\lua_letter_t + \let\a\lua_letter_a \let\b\lua_letter_b + \let\v\lua_letter_v \let\s\lua_letter_s + \let\1\lua_letter_one \let\2\lua_letter_two + \let\3\lua_letter_three \let\4\lua_letter_four + \let\5\lua_letter_five \let\6\lua_letter_six + \let\7\lua_letter_seven \let\8\lua_letter_eight + \let\9\lua_letter_nine \let\0\lua_letter_zero \to \everyluacode -\def\obeyluatokens +\unexpanded\def\obeyluatokens {\setcatcodetable\luacatcodes \the\everyluacode} + +\edef\luamajorversion{\ctxwrite{_MINORVERSION}} +\edef\luaminorversion{\ctxwrite{_MAJORVERSION}} + %D \macros %D {definenamedlua} %D @@ -110,19 +114,20 @@ \installcorenamespace{luacode} -\def\luat_start_named_lua_code#1% +\unexpanded\def\luat_start_named_lua_code#1% {\begingroup \obeylualines \obeyluatokens \csname\??luacode#1\endcsname} -\unexpanded\def\definenamedlua[#1]#2[#3]% no optional arg handling here yet +\unexpanded\def\definenamedlua[#1]#2[#3]% no optional arg handling here yet / we could use numbers instead (more efficient) {\ifcsname\??luacode#1\endcsname \else \scratchcounter\ctxlua{lua.registername("#1","#3")}% - \normalexpanded{\edef\csname\??luacode#1\endcsname##1\csname\e!stop#1\v!code\endcsname}% - {\endgroup\noexpand\directlua\the\scratchcounter{protect("#1\s!data")##1}}% - \expandafter\def \csname\e!start#1\v!code\endcsname {\luat_start_named_lua_code{#1}}% - \expandafter\edef\csname #1\v!code\endcsname##1{\noexpand\directlua\the\scratchcounter{protect("#1\s!data")##1}}% + \normalexpanded{\xdef\csname\??luacode#1\endcsname##1\csname\e!stop#1\v!code\endcsname}% + %{\endgroup\noexpand\directlua\the\scratchcounter{local _ENV=protect("#1\s!data")##1}}% + {\noexpand\normalexpanded{\endgroup\noexpand\directlua\the\scratchcounter{local _ENV=protect("#1\s!data")##1}}}% + \expandafter\edef\csname\e!start#1\v!code\endcsname {\luat_start_named_lua_code{#1}}% + \expandafter\edef\csname #1\v!code\endcsname##1{\noexpand\directlua\the\scratchcounter{local _ENV=protect("#1\s!data")##1}}% \fi} %D We predefine a few. @@ -142,12 +147,25 @@ %D %D \startusercode %D global.context("USER 1") +%D context.par() %D context("USER 2") +%D context.par() %D if characters then -%D context("ACCESS") +%D context("ACCESS directly") +%D elseif global.characters then +%D context("ACCESS via global") +%D else +%D context("NO ACCESS at all") +%D end +%D context.par() +%D if bogus then +%D context("ACCESS directly") +%D elseif global.bogus then +%D context("ACCESS via global") %D else -%D context("NO ACCESS") +%D context("NO ACCESS at all") %D end +%D context.par() %D \stopusercode %D \stopbuffer %D diff --git a/tex/context/base/lxml-ent.lua b/tex/context/base/lxml-ent.lua index be69dec00..14b758f1c 100644 --- a/tex/context/base/lxml-ent.lua +++ b/tex/context/base/lxml-ent.lua @@ -7,9 +7,8 @@ if not modules then modules = { } end modules ['lxml-ent'] = { } local type, next, tonumber = type, next, tonumber -local utf = unicode.utf8 local byte, format = string.byte, string.format -local utfupper, utfchar = utf.upper, utf.char +local utfchar = utf.char local lpegmatch = lpeg.match --[[ldx-- diff --git a/tex/context/base/lxml-inf.lua b/tex/context/base/lxml-inf.lua index 834d152fd..8f1157c7d 100644 --- a/tex/context/base/lxml-inf.lua +++ b/tex/context/base/lxml-inf.lua @@ -8,6 +8,8 @@ if not modules then modules = { } end modules ['lxml-inf'] = { -- This file will be loaded runtime by x-pending.tex. +local concat = table.concat + local xmlwithelements = xml.withelements local getid = lxml.getid diff --git a/tex/context/base/lxml-lpt.lua b/tex/context/base/lxml-lpt.lua index 0c10998a0..4e84930f1 100644 --- a/tex/context/base/lxml-lpt.lua +++ b/tex/context/base/lxml-lpt.lua @@ -10,7 +10,7 @@ if not modules then modules = { } end modules ['lxml-lpt'] = { -- todo: B/C/[get first match] local concat, remove, insert = table.concat, table.remove, table.insert -local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring +local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns @@ -682,7 +682,7 @@ end local function register_expression(expression) local converted = lpegmatch(converter,expression) - local runner = loadstring(format(template_e,converted)) + local runner = load(format(template_e,converted)) runner = (runner and runner()) or function() errorrunner_e(expression,converted) end return { kind = "expression", expression = expression, converted = converted, evaluator = runner } end @@ -690,9 +690,9 @@ end local function register_finalizer(protocol,name,arguments) local runner if arguments and arguments ~= "" then - runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments)) + runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments)) else - runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name)) + runner = load(format(template_f_n,protocol or xml.defaultprotocol,name)) end runner = (runner and runner()) or function() errorrunner_f(name,arguments) end return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner } @@ -1116,6 +1116,7 @@ end expressions.child = function(e,pattern) return applylpath(e,pattern) -- todo: cache end + expressions.count = function(e,pattern) -- what if pattern == empty or nil local collected = applylpath(e,pattern) -- todo: cache return pattern and (collected and #collected) or 0 @@ -1123,13 +1124,30 @@ end -- external -expressions.oneof = function(s,...) -- slow - local t = {...} for i=1,#t do if s == t[i] then return true end end return false +-- expressions.oneof = function(s,...) +-- local t = {...} +-- for i=1,#t do +-- if s == t[i] then +-- return true +-- end +-- end +-- return false +-- end + +expressions.oneof = function(s,...) + for i=1,select("#",...) do + if s == select(i,...) then + return true + end + end + return false end + expressions.error = function(str) xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?"))) return false end + expressions.undefined = function(s) return s == nil end diff --git a/tex/context/base/lxml-tab.lua b/tex/context/base/lxml-tab.lua index 21c7561e8..04b4192c0 100644 --- a/tex/context/base/lxml-tab.lua +++ b/tex/context/base/lxml-tab.lua @@ -39,7 +39,6 @@ local xml = xml --~ local xml = xml -local utf = unicode.utf8 local concat, remove, insert = table.concat, table.remove, table.insert local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub diff --git a/tex/context/base/lxml-tex.lua b/tex/context/base/lxml-tex.lua index 6ff96bf05..8a57ed54e 100644 --- a/tex/context/base/lxml-tex.lua +++ b/tex/context/base/lxml-tex.lua @@ -10,12 +10,10 @@ if not modules then modules = { } end modules ['lxml-tex'] = { -- interface and not the context one. If we ever do that there will -- be an cldf-xml helper library. -local utf = unicode.utf8 - -local utfchar, utfupper = utf.char, utf.upper +local utfchar = utf.char local concat, insert, remove = table.concat, table.insert, table.remove local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match -local type, next, tonumber, tostring = type, next, tonumber, tostring +local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select local lpegmatch = lpeg.match local P, S, C, Cc = lpeg.P, lpeg.S, lpeg.C, lpeg.Cc @@ -125,7 +123,7 @@ function lxml.resolvedentity(str) end context(err) else - local tag = utfupper(str) + local tag = upperchars(str) if trace_entities then report_xml("passing entity '%s' to \\xmle using tag '%s'",str,tag) end @@ -841,10 +839,32 @@ function lxml.installsetup(what,document,setup,where) end end +-- function lxml.flushsetups(id,...) +-- local done, list = { }, { ... } +-- for i=1,#list do +-- local document = list[i] +-- local sd = setups[document] +-- if sd then +-- for k=1,#sd do +-- local v= sd[k] +-- if not done[v] then +-- if trace_loading then +-- report_lxml("applying setup %02i = %s to %s",k,v,document) +-- end +-- contextsprint(ctxcatcodes,"\\xmlsetup{",id,"}{",v,"}") +-- done[v] = true +-- end +-- end +-- elseif trace_loading then +-- report_lxml("no setups for %s",document) +-- end +-- end +-- end + function lxml.flushsetups(id,...) - local done, list = { }, { ... } - for i=1,#list do - local document = list[i] + local done = { } + for i=1,select("#",...) do + local document = select(i,...) local sd = setups[document] if sd then for k=1,#sd do diff --git a/tex/context/base/m-database.lua b/tex/context/base/m-database.lua index b9ec3aa36..caa40f8f6 100644 --- a/tex/context/base/m-database.lua +++ b/tex/context/base/m-database.lua @@ -20,12 +20,16 @@ local report_database = logs.reporter("database") buffers.database = buffers.database or { } +local l_tab = lpegpatterns.tab +local l_space = lpegpatterns.space +local l_comma = lpegpatterns.comma + local separators = { -- not interfaced - tab = lpegpatterns.tab, - tabs = lpegpatterns.tab^1, - comma = lpegpatterns.comma, - space = lpegpatterns.space, - spaces = lpegpatterns.space^1, + tab = l_tab, + tabs = l_tab^1, + comma = l_comma, + space = l_space, + spaces = l_space^1, } function buffers.database.process(settings) @@ -54,7 +58,7 @@ function buffers.database.process(settings) local quotedata = nil for chr in gmatch(quotechar,".") do local quotechar = lpegP(chr) - local quoteword = quotechar * lpeg.C((1 - quotechar)^0) * quotechar + local quoteword = l_space^0 * quotechar * lpegC((1 - quotechar)^0) * quotechar * l_space^0 if quotedata then quotedata = quotedata + quoteword else diff --git a/tex/context/base/m-morse.mkvi b/tex/context/base/m-morse.mkvi index 6082d588a..a2c20dff7 100644 --- a/tex/context/base/m-morse.mkvi +++ b/tex/context/base/m-morse.mkvi @@ -100,16 +100,22 @@ local codes = { morse.codes = codes -setmetatable(codes, { __index = function(t,k) +local fallbackself = false + +local function codefallback(t,k) if k then local u = ucchars[k] local v = rawget(t,u) or rawget(t,shchars[u]) or false t[k] = v return v + elseif fallbackself then + return k else return false end -end }) +end + +table.setmetatableindex(codes,codefallback) local MorseBetweenWords = context.MorseBetweenWords local MorseBetweenCharacters = context.MorseBetweenCharacters @@ -228,16 +234,19 @@ end \def\MorseSpace {\hskip7\dimexpr\MorseWidth\relax} \def\MorseUnknown #text{[\detokenize{#text}]} -\unexpanded\def\MorseCode #text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es,true)}} -\unexpanded\def\MorseString#text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es)}} -\unexpanded\def\MorseFile #text{\ctxlua{moduledata.morse.filetomorse("#text")}} -\unexpanded\def\MorseTable {\ctxlua{moduledata.morse.showtable()}} +\unexpanded\def\MorseCode #text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es,true)}} +\unexpanded\def\MorseString #text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es)}} +\unexpanded\def\MorseFile #text{\ctxlua{moduledata.morse.filetomorse("#text")}} +\unexpanded\def\MorseTable {\ctxlua{moduledata.morse.showtable()}} \let\Morse \MorseString +%def\MorseShort {·} +%def\MorseLong {—} + \protect -\continueifinputfile{m-morse.mkiv} +\continueifinputfile{m-morse.mkvi} \starttext diff --git a/tex/context/base/m-timing.mkiv b/tex/context/base/m-timing.mkiv index 18646cfae..5502768f6 100644 --- a/tex/context/base/m-timing.mkiv +++ b/tex/context/base/m-timing.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\ifx\ShowNamedUsage\undefined \else \endinput \fi +\ifdefined\ShowNamedUsage \endinput \fi %D Written at the end of 2007, this module is dedicated to Taco. Reaching this %D point in \LUATEX\ was a non trivial effort. By visualizing a bit what happens @@ -55,12 +55,13 @@ end \stopnotmode \unexpanded\def\ShowNamedUsage#1#2#3% - {\setbox\scratchbox\vbox\bgroup\startMPcode + {\setbox\scratchbox\vbox\bgroup + \startMPcode begingroup ; save p, q, b, h, w ; path p, q, b ; numeric h, w ; p := \cldcontext{moduledata.progress.path("#1","#2")} ; % p := p shifted -llcorner p ; - if bbwidth(p) > 1 : + if bbwidth(p) > 0 : h := 100 ; w := 2 * h ; w := \the\textwidth-3pt ; % correct for pen p := p xstretched w ; @@ -79,7 +80,8 @@ end fi ; fi ; endgroup ; - \stopMPcode\egroup + \stopMPcode + \egroup \scratchdimen\wd\scratchbox \ifdim\scratchdimen>\zeropoint \startlinecorrection diff --git a/tex/context/base/math-ini.lua b/tex/context/base/math-ini.lua index c83bac309..46c366c3e 100644 --- a/tex/context/base/math-ini.lua +++ b/tex/context/base/math-ini.lua @@ -11,7 +11,8 @@ if not modules then modules = { } end modules ['math-ini'] = { -- -- isn't characters.data loaded already ... shortcut it here -local format, utfchar, utfbyte = string.format, utf.char, utf.byte +local format = string.format +local utfchar, utfbyte = utf.char, utf.byte local setmathcode, setdelcode = tex.setmathcode, tex.setdelcode local texattribute = tex.attribute local floor = math.floor diff --git a/tex/context/base/math-ini.mkiv b/tex/context/base/math-ini.mkiv index 3de4c808f..1e78ff492 100644 --- a/tex/context/base/math-ini.mkiv +++ b/tex/context/base/math-ini.mkiv @@ -1082,6 +1082,18 @@ \textface \fi} +\def\mathstylecommand#1#2#3% + {\ifcase\normalmathstyle + \expandafter#1\or + \expandafter#1\or + \expandafter#1\or + \expandafter#1\or + \expandafter#2\or + \expandafter#2\or + \expandafter#3\or + \expandafter#3\else + \expandafter#1\fi} + %D A plain inheritance: \def\mathpalette#1#2% diff --git a/tex/context/base/math-noa.lua b/tex/context/base/math-noa.lua index e9cf79590..3c67d26a2 100644 --- a/tex/context/base/math-noa.lua +++ b/tex/context/base/math-noa.lua @@ -18,8 +18,6 @@ if not modules then modules = { } end modules ['math-noa'] = { -- 20D6 -> 2190 -- 20D7 -> 2192 -local utf = unicode.utf8 - local utfchar, utfbyte = utf.char, utf.byte local format, rep = string.format, string.rep local concat = table.concat diff --git a/tex/context/base/meta-fun.lua b/tex/context/base/meta-fun.lua index 7594d0c78..78ee25baf 100644 --- a/tex/context/base/meta-fun.lua +++ b/tex/context/base/meta-fun.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['meta-fun'] = { -- very experimental, actually a joke ... see metafun manual for usage -local format, loadstring, type = string.format, loadstring, type +local format, load, type = string.format, load, type local metapost = metapost @@ -39,7 +39,7 @@ function metafun.interpolate(f,b,e,s,c) local done = false context("(") for i=b,e,(e-b)/s do - local d = loadstring(format("return function(x) return %s end",f)) + local d = load(format("return function(x) return %s end",f)) if d then d = d() if done then diff --git a/tex/context/base/meta-imp-dum.mkiv b/tex/context/base/meta-imp-dum.mkiv index 83fe12f09..05d40b70b 100644 --- a/tex/context/base/meta-imp-dum.mkiv +++ b/tex/context/base/meta-imp-dum.mkiv @@ -120,6 +120,7 @@ \c!height=\figureheight, \c!frame=\v!off, \c!strut=\v!no, + \c!align={\v!middle,\v!lohi}, \c!background=figure:placeholder:graphic, \c!foregroundcolor=\s!white]% \doifelse{\externalfigureparameter\c!text}\v!yes diff --git a/tex/context/base/meta-ini.lua b/tex/context/base/meta-ini.lua index 8b6fd22a2..928048776 100644 --- a/tex/context/base/meta-ini.lua +++ b/tex/context/base/meta-ini.lua @@ -7,7 +7,9 @@ if not modules then modules = { } end modules ['meta-ini'] = { } local tonumber = tonumber -local format, gmatch, match, gsub = string.format, string.gmatch, string.match, string.gsub +local format = string.format +local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns +local P, Cs, R, S, C, Cc = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc local context = context @@ -45,46 +47,94 @@ end local colorhash = attributes.list[attributes.private('color')] -local validdimen = lpeg.patterns.validdimen * lpeg.P(-1) - -local lpegmatch = lpeg.match local textype = tex.type local MPcolor = context.MPcolor +-- local validdimen = lpegpatterns.validdimen * P(-1) +-- +-- function commands.prepareMPvariable(v) -- slow but ok +-- if v == "" then +-- MPcolor("black") +-- else +-- local typ, var = match(v,"(.):(.*)") +-- if not typ then +-- -- parse +-- if colorhash[v] then +-- MPcolor(v) +-- elseif tonumber(v) then +-- context(v) +-- elseif lpegmatch(validdimen,v) then +-- return context("\\the\\dimexpr %s",v) +-- else +-- for s in gmatch(v,"\\([a-zA-Z]+)") do -- can have trailing space +-- local t = textype(s) +-- if t == "dimen" then +-- return context("\\the\\dimexpr %s",v) +-- elseif t == "count" then +-- return context("\\the\\numexpr %s",v) +-- end +-- end +-- context("\\number %s",v) -- 0.4 ... +-- end +-- elseif typ == "d" then -- to be documented +-- -- dimension +-- context("\\the\\dimexpr %s",var) +-- elseif typ == "n" then -- to be documented +-- -- number +-- context("\\the\\numexpr %s",var) +-- elseif typ == "s" then -- to be documented +-- -- string +-- context(var) +-- elseif typ == "c" then -- to be documented +-- -- color +-- MPcolor(var) +-- else +-- context(var) +-- end +-- end +-- end + +-- we can actually get the dimen/count values here + +local dimenorname = + lpegpatterns.validdimen / function(s) + context("\\the\\dimexpr %s",s) + end + + (C(lpegpatterns.float) + Cc(1)) * lpegpatterns.space^0 * P("\\") * C(lpegpatterns.letter^1) / function(f,s) + local t = textype(s) + if t == "dimen" then + context("\\the\\dimexpr %s\\%s",f,s) + elseif t == "count" then + context("\\the\\numexpr \\%s * %s\\relax",s,f) -- \scratchcounter is not permitted + end + end + +local splitter = lpeg.splitat(":",true) + function commands.prepareMPvariable(v) -- slow but ok if v == "" then MPcolor("black") else - local typ, var = match(v,"(.):(.*)") - if not typ then + local typ, var = lpegmatch(splitter,v) + if not var then -- parse if colorhash[v] then MPcolor(v) elseif tonumber(v) then context(v) - elseif lpegmatch(validdimen,v) then - return context("\\the\\dimexpr %s",v) - else - for s in gmatch(v,"\\(.-)") do - local t = textype(s) - if t == "dimen" then - return context("\\the\\dimexpr %s",v) - elseif t == "count" then - return context("\\the\\numexpr %s",v) - end - end - return context("\\number %s",v) -- 0.4 ... + elseif not lpegmatch(dimenorname,v) then + context("\\number %s",v) -- 0.4 ... end - elseif typ == "d" then + elseif typ == "d" then -- to be documented -- dimension context("\\the\\dimexpr %s",var) - elseif typ == "n" then + elseif typ == "n" then -- to be documented -- number context("\\the\\numexpr %s",var) - elseif typ == "s" then + elseif typ == "s" then -- to be documented -- string context(var) - elseif typ == "c" then + elseif typ == "c" then -- to be documented -- color MPcolor(var) else @@ -93,12 +143,19 @@ function commands.prepareMPvariable(v) -- slow but ok end end -function metapost.formatnumber(f,n) -- just lua format - f = gsub(f,"@(%d)","%%.%1") - f = gsub(f,"@","%%") - f = format(f,tonumber(n) or 0) - f = gsub(f,"e([%+%-%d]+)",function(s) - return format("\\times10^{%s}",tonumber(s) or s) -- strips leading zeros - end) - context.mathematics(f) +-- function metapost.formatnumber(f,n) -- just lua format +-- f = gsub(f,"@(%d)","%%.%1") +-- f = gsub(f,"@","%%") +-- f = format(f,tonumber(n) or 0) +-- f = gsub(f,"e([%+%-%d]+)",function(s) +-- return format("\\times10^{%s}",tonumber(s) or s) -- strips leading zeros +-- end) +-- context.mathematics(f) +-- end + +local one = Cs((P("@")/"%%." * (R("09")^1) + P("@")/"%%" + 1)^0) +local two = Cs((P("e")/"" * ((S("+-")^0 * R("09")^1)/function(s) return format("\\times10^{%s}",tonumber(s) or s) end) + 1)^1) + +function metapost.formatnumber(fmt,n) -- just lua format + context.mathematics(lpegmatch(two,format(lpegmatch(one,fmt),n))) end diff --git a/tex/context/base/meta-ini.mkiv b/tex/context/base/meta-ini.mkiv index e731d26e0..5ac7908d5 100644 --- a/tex/context/base/meta-ini.mkiv +++ b/tex/context/base/meta-ini.mkiv @@ -395,15 +395,10 @@ %D \setupMPvariables[meta:button][size=20pt] %D \stoptyping -% \startlines -% \def\xxx{\lineheight} \doprepareMPvariable{xxx} \xxx -% \def\xxx{2pt} \doprepareMPvariable{xxx} \xxx -% \def\xxx{2} \doprepareMPvariable{xxx} \xxx -% \def\xxx{\scratchcounter} \doprepareMPvariable{xxx} \xxx -% \def\xxx{red} \doprepareMPvariable{xxx} \xxx -% \def\xxx{0.4} \doprepareMPvariable{xxx} \xxx -% \stoplines - +% \lineheight 2pt 2 \scratchcounter red 0.4 .5\bodyfontsize +% +% see cont-loc for test code +% % currently the inheritance of backgrounds does not work and % we might drop it anyway (too messy) diff --git a/tex/context/base/meta-pag.mkiv b/tex/context/base/meta-pag.mkiv index ef53d4753..7124902e7 100644 --- a/tex/context/base/meta-pag.mkiv +++ b/tex/context/base/meta-pag.mkiv @@ -38,6 +38,8 @@ % todo: ColumnDistance \stopMPinitializations +% maybe always set as frozen anyway + \startMPinitializations def LoadPageState = OnRightPage:=\MPonrightpage; diff --git a/tex/context/base/meta-pdf.lua b/tex/context/base/meta-pdf.lua index c68350a96..307779b16 100644 --- a/tex/context/base/meta-pdf.lua +++ b/tex/context/base/meta-pdf.lua @@ -195,6 +195,7 @@ end function mps.setdash(...) -- can be made faster, operate on t = { ... } local n = select("#",...) mpscode("[" .. concat({...}," ",1,n-1) .. "] " .. select(n,...) .. " d") + -- mpscode("[" .. concat({select(1,n-1)}," ") .. "] " .. select(n,...) .. " d") end function mps.resetdash() diff --git a/tex/context/base/mlib-pdf.lua b/tex/context/base/mlib-pdf.lua index 88f26e755..e22e3af30 100644 --- a/tex/context/base/mlib-pdf.lua +++ b/tex/context/base/mlib-pdf.lua @@ -6,6 +6,8 @@ if not modules then modules = { } end modules ['mlib-pdf'] = { license = "see context related readme files", } +-- maybe %s is better than %f + local format, concat, gsub = string.format, table.concat, string.gsub local abs, sqrt, round = math.abs, math.sqrt, math.round local setmetatable = setmetatable @@ -356,7 +358,7 @@ function metapost.flush(result,flusher,askedfig) end local dl = object.dash if dl then - local d = format("[%s] %i d",concat(dl.dashes or {}," "),dl.offset) + local d = format("[%s] %f d",concat(dl.dashes or {}," "),dl.offset) if d ~= dashed then dashed = d t[#t+1] = dashed diff --git a/tex/context/base/mlib-run.lua b/tex/context/base/mlib-run.lua index 5e702245d..6a23fe316 100644 --- a/tex/context/base/mlib-run.lua +++ b/tex/context/base/mlib-run.lua @@ -243,7 +243,7 @@ if mplibone then -- mpsformat = mpsformat .. "-" .. mpsbase -- end -- mpsformat = file.addsuffix(mpsformat, "mem") - -- local mpsformatfullname = caches.getfirstreadablefile(mpsformat,"formats") or "" + -- local mpsformatfullname = caches.getfirstreadablefile(mpsformat,"formats","metapost") or "" -- if mpsformatfullname ~= "" then -- report_metapost("loading '%s' from '%s'", mpsinput, mpsformatfullname) -- local mpx, result = metapost.load(mpsformatfullname) diff --git a/tex/context/base/mtx-context-copy.tex b/tex/context/base/mtx-context-copy.tex new file mode 100644 index 000000000..b798b3f8d --- /dev/null +++ b/tex/context/base/mtx-context-copy.tex @@ -0,0 +1,151 @@ +% engine=luatex + +%D \module +%D [ file=mtx-context-copy, +%D version=2008.11.10, % about that time i started playing with this +%D title=\CONTEXT\ Extra Trickry, +%D subtitle=Copying Files, +%D author=Hans Hagen, +%D date=\currentdate, +%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}] +%C +%C This module is part of the \CONTEXT\ macro||package and is +%C therefore copyrighted by \PRAGMA. See mreadme.pdf for +%C details. + +%D This is a \TEXEXEC\ features that has been moved to \MKIV. + +% begin help +% +% usage: context --extra=copy [options] list-of-files +% +% --interaction : add hyperlinks +% +% end help + +\input mtx-context-common.tex + +\doif {\getdocumentargument{interaction}} {yes} { + \setupinteraction + [state=start] + \setupexternalfigures + [interaction=yes] +} + +\setupexternalfigures + [directory=] + +\definepapersize + [fit] + [width=\figurewidth, + height=\figureheight] + +\setuplayout + [page] + +% this will be an option to \copypages + +\unexpanded\def\CopyPages[#1]% + {\dorecurse + {\noffigurepages} + {\startTEXpage + \externalfigure[#1][page=\recurselevel]% + \stopTEXpage}} + +\starttext + +\startluacode + + if #document.files == 0 then + context("no files given") + else + for _, filename in ipairs(document.files) do + if not string.find(filename,"^mtx%-context%-") then + logs.report("copy",filename) + context.getfiguredimensions + { filename } + context.setuppapersize + { "fit" } + context.CopyPages + { filename } + end + end + end + +\stopluacode + +\stoptext + + +% def copyoutput +% copyortrim(false,'copy') +% end + +% def trimoutput +% copyortrim(true,'trim') +% end + +% def copyortrim(trim=false,what='unknown') +% if job = TEX.new(logger) then +% prepare(job) +% job.cleanuptemprunfiles +% files = if @commandline.option('sort') then @commandline.arguments.sort else @commandline.arguments end +% if files.length > 0 then +% if f = File.open(job.tempfilename('tex'),'w') then +% scale = @commandline.checkedoption('scale') +% begin +% scale = (scale.to_f * 1000.0).to_i if scale.to_i < 10 +% rescue +% scale = 1000 +% end +% scale = scale.to_i +% paperoffset = @commandline.checkedoption('paperoffset', '0cm') +% f << "\\starttext\n" +% files.each do |filename| +% result = @commandline.checkedoption('result','texexec') +% begin +% if (filename !~ /^texexec/io) && (filename !~ /^#{result}/) then +% report("copying file: #{filename}") +% f << "\\getfiguredimensions\n" +% f << " [#{filename}]\n" +% f << " [scale=#{scale},\n" +% f << " page=1,\n" +% f << " size=trimbox\n" if trim +% f << "]\n" +% f << "\\definepapersize\n" +% f << " [copy]\n" +% f << " [width=\\figurewidth,\n" +% f << " height=\\figureheight]\n" +% f << "\\setuppapersize\n" +% f << " [copy][copy]\n" +% f << "\\setuplayout\n" +% f << " [page]\n" +% f << "\\setupexternalfigures\n" +% f << " [directory=]\n" +% f << "\\copypages\n" +% f << " [#{filename}]\n" +% f << " [scale=#{scale},\n" +% f << " marking=on,\n" if @commandline.option('markings') +% f << " size=trimbox,\n" if trim +% f << " offset=#{paperoffset}]\n" +% end +% rescue +% report("wrong specification") +% end +% end +% f << "\\stoptext\n" +% f.close +% job.setvariable('interface','english') +% job.setvariable('simplerun',true) +% # job.setvariable('nooptionfile',true) +% job.setvariable('files',[job.tempfilename]) +% job.processtex +% else +% report("no files to #{what}") +% end +% else +% report("no files to #{what}") +% end +% job.cleanuptemprunfiles +% end +% end diff --git a/tex/context/base/mtx-context-select.tex b/tex/context/base/mtx-context-select.tex index 7d6535119..2f63f5955 100644 --- a/tex/context/base/mtx-context-select.tex +++ b/tex/context/base/mtx-context-select.tex @@ -4,7 +4,7 @@ %D [ file=mtx-context-select, %D version=2008.11.10, % about that time i started playing with this %D title=\CONTEXT\ Extra Trickry, -%D subtitle=Listing Files, +%D subtitle=Selecting Files, %D author=Hans Hagen, %D date=\currentdate, %D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}] @@ -94,6 +94,7 @@ end for _, filename in ipairs(document.files) do if not string.find(filename,"^mtx%-context%-") then + logs.report("select",filename) context.filterpages ( { filename }, { selection }, diff --git a/tex/context/base/mtx-context-timing.tex b/tex/context/base/mtx-context-timing.tex index 2fd9cb807..c545c768f 100644 --- a/tex/context/base/mtx-context-timing.tex +++ b/tex/context/base/mtx-context-timing.tex @@ -35,9 +35,13 @@ \starttext - \doifsomething {\getdocumentfilename{1}} { + + \doifsomethingelse {\getdocumentfilename{1}} { + \writestatus{system}{processing timings of \getdocumentfilename{1}} \LoadUsage{\getdocumentfilename{1}-luatex-progress} \ShowUsage{\getdocumentfilename{1}-luatex-progress} + } { + \writestatus{system}{no filename given for timings} } \stoptext diff --git a/tex/context/base/mult-de.mkii b/tex/context/base/mult-de.mkii index ce34b6a5b..99a5944b0 100644 --- a/tex/context/base/mult-de.mkii +++ b/tex/context/base/mult-de.mkii @@ -1699,6 +1699,7 @@ \setinterfacecommand{switchtorawfont}{switchtorawfont} \setinterfacecommand{sym}{sym} \setinterfacecommand{symbol}{symbol} +\setinterfacecommand{symoffset}{symoffset} \setinterfacecommand{synchronizationbar}{synchronisationsbalken} \setinterfacecommand{synchronize}{synchronisieren} \setinterfacecommand{tab}{tab} diff --git a/tex/context/base/mult-def.lua b/tex/context/base/mult-def.lua index 26b2b8529..9c7b4f767 100644 --- a/tex/context/base/mult-def.lua +++ b/tex/context/base/mult-def.lua @@ -5889,6 +5889,10 @@ return { ["pe"]="نم", ["ro"]="sim", }, + ["symoffset"]={ + ["en"]="symoffset", + ["nl"]="symoffset", + }, ["symbol"]={ ["cs"]="symbol", ["de"]="symbol", diff --git a/tex/context/base/mult-en.mkii b/tex/context/base/mult-en.mkii index 860060247..24bf6c074 100644 --- a/tex/context/base/mult-en.mkii +++ b/tex/context/base/mult-en.mkii @@ -1699,6 +1699,7 @@ \setinterfacecommand{switchtorawfont}{switchtorawfont} \setinterfacecommand{sym}{sym} \setinterfacecommand{symbol}{symbol} +\setinterfacecommand{symoffset}{symoffset} \setinterfacecommand{synchronizationbar}{synchronizationbar} \setinterfacecommand{synchronize}{synchronize} \setinterfacecommand{tab}{tab} diff --git a/tex/context/base/mult-fr.mkii b/tex/context/base/mult-fr.mkii index f52a761d2..0640cbecb 100644 --- a/tex/context/base/mult-fr.mkii +++ b/tex/context/base/mult-fr.mkii @@ -1699,6 +1699,7 @@ \setinterfacecommand{switchtorawfont}{changepolicebrute} \setinterfacecommand{sym}{sym} \setinterfacecommand{symbol}{symbole} +\setinterfacecommand{symoffset}{symoffset} \setinterfacecommand{synchronizationbar}{barresynchronisation} \setinterfacecommand{synchronize}{synchronise} \setinterfacecommand{tab}{tab} diff --git a/tex/context/base/mult-it.mkii b/tex/context/base/mult-it.mkii index b91e3ee5b..f4d02ad46 100644 --- a/tex/context/base/mult-it.mkii +++ b/tex/context/base/mult-it.mkii @@ -1699,6 +1699,7 @@ \setinterfacecommand{switchtorawfont}{passaafontgrezzo} \setinterfacecommand{sym}{sim} \setinterfacecommand{symbol}{simbolo} +\setinterfacecommand{symoffset}{symoffset} \setinterfacecommand{synchronizationbar}{barrasincronizzazione} \setinterfacecommand{synchronize}{sincronizza} \setinterfacecommand{tab}{tab} diff --git a/tex/context/base/mult-low.lua b/tex/context/base/mult-low.lua index 8338d41eb..287e2c9c1 100644 --- a/tex/context/base/mult-low.lua +++ b/tex/context/base/mult-low.lua @@ -114,6 +114,8 @@ return { "constantnumber", "constantnumberargument", "constantdimen", "constantdimenargument", "constantemptyargument", -- "continueifinputfile", + -- + "luastringsep", "!!bs", "!!es", }, ["helpers"] = { -- @@ -215,7 +217,9 @@ return { -- "removetoks", "appendtoks", "prependtoks", "appendtotoks", "prependtotoks", "to", -- - "endgraf", "endpar", "everyendpar", "reseteverypar", "finishpar", "empty", "null", "space", "quad", "enspace", "obeyspaces", "obeylines", "normalspace", + "endgraf", "endpar", "everyendpar", "reseteverypar", "finishpar", "empty", "null", "space", "quad", "enspace", + "obeyspaces", "obeylines", "obeyedspace", "obeyedline", + "normalspace", -- "executeifdefined", -- @@ -227,6 +231,8 @@ return { -- "setmeasure", "setemeasure", "setgmeasure", "setxmeasure", "definemeasure", "freezemeasure", "measure", -- + "installcorenamespace", + -- "getvalue", "setvalue", "setevalue", "setgvalue", "setxvalue", "letvalue", "letgvalue", "resetvalue", "undefinevalue", "ignorevalue", "setuvalue", "setuevalue", "setugvalue", "setuxvalue", @@ -306,6 +312,18 @@ return { "startimath", "stopimath", "normalstartimath", "normalstopimath", "startdmath", "stopdmath", "normalstartdmath", "normalstopdmath", -- - "uncramped", "cramped", "triggermathstyle", "mathstylefont", "mathstyleface", + "uncramped", "cramped", "triggermathstyle", "mathstylefont", "mathstyleface", "mathstylecommand", + -- + "luaexpr", "expdoifelse", "expdoif", "expdoifnot", "expdoifcommonelse", "expdoifinsetelse", + -- + "ctxdirectlua", "ctxlatelua", "ctxsprint", "ctxwrite", "ctxcommand", "ctxdirectcommand", "ctxlatecommand", "ctxreport", + "ctxlua", "luacode", "lateluacode", "directluacode", + "registerctxluafile", "ctxloadluafile", + "luaversion", "luamajorversion", "luaminorversion", + "ctxluacode", "luaconditional", "luaexpanded", + "startluaparameterset", "stopluaparameterset", "luaparameterset", + "definenamedlua", + "obeylualines", "obeyluatokens", + "startluacode", "stopluacode", "startlua", "stoplua", } } diff --git a/tex/context/base/mult-nl.mkii b/tex/context/base/mult-nl.mkii index dbce41d85..9247f7ebf 100644 --- a/tex/context/base/mult-nl.mkii +++ b/tex/context/base/mult-nl.mkii @@ -1699,6 +1699,7 @@ \setinterfacecommand{switchtorawfont}{switchtorawfont} \setinterfacecommand{sym}{sym} \setinterfacecommand{symbol}{symbool} +\setinterfacecommand{symoffset}{symoffset} \setinterfacecommand{synchronizationbar}{synchronisatiebalk} \setinterfacecommand{synchronize}{synchroniseer} \setinterfacecommand{tab}{tab} diff --git a/tex/context/base/mult-pe.mkii b/tex/context/base/mult-pe.mkii index 58d3ff527..82f7abcd9 100644 --- a/tex/context/base/mult-pe.mkii +++ b/tex/context/base/mult-pe.mkii @@ -1699,6 +1699,7 @@ \setinterfacecommand{switchtorawfont}{تغییربه‌قلم‌خام} \setinterfacecommand{sym}{نم} \setinterfacecommand{symbol}{نماد} +\setinterfacecommand{symoffset}{symoffset} \setinterfacecommand{synchronizationbar}{میله‌تطابق} \setinterfacecommand{synchronize}{تطابق} \setinterfacecommand{tab}{تب} diff --git a/tex/context/base/mult-ro.mkii b/tex/context/base/mult-ro.mkii index adc811188..01ffe9fbe 100644 --- a/tex/context/base/mult-ro.mkii +++ b/tex/context/base/mult-ro.mkii @@ -1699,6 +1699,7 @@ \setinterfacecommand{switchtorawfont}{trecilafontraw} \setinterfacecommand{sym}{sim} \setinterfacecommand{symbol}{simbol} +\setinterfacecommand{symoffset}{symoffset} \setinterfacecommand{synchronizationbar}{barasincronizare} \setinterfacecommand{synchronize}{sincronizeaza} \setinterfacecommand{tab}{tab} diff --git a/tex/context/base/node-aux.lua b/tex/context/base/node-aux.lua index 03a8789f7..466b7991d 100644 --- a/tex/context/base/node-aux.lua +++ b/tex/context/base/node-aux.lua @@ -12,7 +12,7 @@ local type, tostring = type, tostring local nodes, node = nodes, node -local utfvalues = string.utfvalues +local utfvalues = utf.values local nodecodes = nodes.nodecodes diff --git a/tex/context/base/node-fnt.lua b/tex/context/base/node-fnt.lua index 66cd9916f..543f64acb 100644 --- a/tex/context/base/node-fnt.lua +++ b/tex/context/base/node-fnt.lua @@ -219,5 +219,166 @@ function handlers.characters(head) return head, true end +-- function handlers.xcharacters(head) +-- -- either next or not, but definitely no already processed list +-- starttiming(nodes) +-- local usedfonts, attrfonts, done = { }, { }, false +-- local a, u, prevfont, prevattr = 0, 0, nil, nil +-- local ap = 0 +-- if trace_fontrun then +-- run = run + 1 +-- report_fonts() +-- report_fonts("checking node list, run %s",run) +-- report_fonts() +-- local n = head +-- while n do +-- if n.id == glyph_code then +-- local font = n.font +-- local attr = has_attribute(n,0) or 0 +-- report_fonts("font %03i, dynamic %03i, glyph %s",font,attr,utf.char(n.char)) +-- else +-- report_fonts("[%s]",nodecodes[n.id]) +-- end +-- n = n.next +-- end +-- end +-- for n in traverse_id(glyph_code,head) do +-- -- if n.subtype<256 then +-- local font = n.font +-- local attr = n.attr +-- if attr ~= prevattr then +-- local an = has_attribute(n,0) or 0 +-- if ap ~= an and an > 0 then +-- local used = attrfonts[font] +-- if not used then +-- used = { } +-- attrfonts[font] = used +-- end +-- if not used[an] then +-- local sd = setfontdynamics[font] +-- if sd then -- always true ? +-- local d = sd(font,an) -- can we cache this one? +-- if d then +-- used[an] = d +-- a = a + 1 +-- else +-- -- can't happen ... otherwise best use nil/false distinction +-- end +-- end +-- end +-- elseif font ~= prevfont then +-- local used = usedfonts[font] +-- if not used then +-- local fp = fontprocesses[font] +-- if fp then +-- usedfonts[font] = fp +-- u = u + 1 +-- else +-- -- can't happen ... otherwise best use nil/false distinction +-- end +-- end +-- end +-- prevfont = font +-- prevattr = attr +-- ap = an +-- elseif font ~= prevfont then +-- local used = usedfonts[font] +-- if not used then +-- local fp = fontprocesses[font] +-- if fp then +-- usedfonts[font] = fp +-- u = u + 1 +-- else +-- -- can't happen ... otherwise best use nil/false distinction +-- end +-- end +-- prevfont = font +-- end +-- -- end +-- end +-- if trace_fontrun then +-- report_fonts() +-- report_fonts("statics : %s",(u > 0 and concat(keys(usedfonts)," ")) or "none") +-- report_fonts("dynamics: %s",(a > 0 and concat(keys(attrfonts)," ")) or "none") +-- report_fonts() +-- end +-- -- we could combine these and just make the attribute nil +-- if u == 1 then +-- local font, processors = next(usedfonts) +-- local n = #processors +-- if n > 0 then +-- local h, d = processors[1](head,font,0) +-- head = h or head +-- done = done or d +-- if n > 1 then +-- for i=2,n do +-- local h, d = processors[i](head,font,0) +-- head = h or head +-- done = done or d +-- end +-- end +-- end +-- elseif u > 0 then +-- for font, processors in next, usedfonts do +-- local n = #processors +-- local h, d = processors[1](head,font,0) +-- head = h or head +-- done = done or d +-- if n > 1 then +-- for i=2,n do +-- local h, d = processors[i](head,font,0) +-- head = h or head +-- done = done or d +-- end +-- end +-- end +-- end +-- if a == 1 then +-- local font, dynamics = next(attrfonts) +-- for attribute, processors in next, dynamics do -- attr can switch in between +-- local n = #processors +-- if n == 0 then +-- report_fonts("no processors associated with dynamic %s",attribute) +-- else +-- local h, d = processors[1](head,font,attribute) +-- head = h or head +-- done = done or d +-- if n > 1 then +-- for i=2,n do +-- local h, d = processors[i](head,font,attribute) +-- head = h or head +-- done = done or d +-- end +-- end +-- end +-- end +-- elseif a > 0 then +-- for font, dynamics in next, attrfonts do +-- for attribute, processors in next, dynamics do -- attr can switch in between +-- local n = #processors +-- if n == 0 then +-- report_fonts("no processors associated with dynamic %s",attribute) +-- else +-- local h, d = processors[1](head,font,attribute) +-- head = h or head +-- done = done or d +-- if n > 1 then +-- for i=2,n do +-- local h, d = processors[i](head,font,attribute) +-- head = h or head +-- done = done or d +-- end +-- end +-- end +-- end +-- end +-- end +-- stoptiming(nodes) +-- if trace_characters then +-- nodes.report(head,done) +-- end +-- return head, true +-- end + handlers.protectglyphs = node.protect_glyphs handlers.unprotectglyphs = node.unprotect_glyphs diff --git a/tex/context/base/node-ini.lua b/tex/context/base/node-ini.lua index e654d9e0f..a294643b2 100644 --- a/tex/context/base/node-ini.lua +++ b/tex/context/base/node-ini.lua @@ -13,7 +13,6 @@ modules.

-- this module is being reconstructed -local utf = unicode.utf8 local next, type = next, type local format, match, gsub = string.format, string.match, string.gsub local concat, remove = table.concat, table.remove diff --git a/tex/context/base/node-inj.lua b/tex/context/base/node-inj.lua index 4e0824fe9..33eaa6eb1 100644 --- a/tex/context/base/node-inj.lua +++ b/tex/context/base/node-inj.lua @@ -9,7 +9,7 @@ if not modules then modules = { } end modules ['node-inj'] = { -- This is very experimental (this will change when we have luatex > .50 and -- a few pending thingies are available. Also, Idris needs to make a few more -- test fonts. Btw, future versions of luatex will have extended glyph properties --- that can be of help. +-- that can be of help. Some optimizations can go away when we have faster machines. local next = next diff --git a/tex/context/base/node-pro.lua b/tex/context/base/node-pro.lua index d30ad49a9..42c6a8543 100644 --- a/tex/context/base/node-pro.lua +++ b/tex/context/base/node-pro.lua @@ -6,7 +6,6 @@ if not modules then modules = { } end modules ['node-pro'] = { license = "see context related readme files" } -local utf = unicode.utf8 local utfchar = utf.char local format, concat = string.format, table.concat diff --git a/tex/context/base/node-typ.lua b/tex/context/base/node-typ.lua index 754b398c4..6e1a31643 100644 --- a/tex/context/base/node-typ.lua +++ b/tex/context/base/node-typ.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['node-typ'] = { license = "see context related readme files" } -local utfvalues = string.utfvalues +local utfvalues = utf.values local currentfont = font.current local fontparameters = fonts.hashes.parameters diff --git a/tex/context/base/phys-dim.mkiv b/tex/context/base/phys-dim.mkiv index 934985399..910ab9524 100644 --- a/tex/context/base/phys-dim.mkiv +++ b/tex/context/base/phys-dim.mkiv @@ -407,6 +407,10 @@ \newtoks \everyunits % we keep the old \units command so we need a longer one +\appendtoks + \disablemathpunctuation +\to \everyunits + \appendtoks \setuevalue\currentunit{\phys_units_direct{\currentunit}} \to \everydefineunit diff --git a/tex/context/base/s-abr-01.tex b/tex/context/base/s-abr-01.tex index 500caa4b3..019a7b2fb 100644 --- a/tex/context/base/s-abr-01.tex +++ b/tex/context/base/s-abr-01.tex @@ -53,6 +53,7 @@ \logo [CCODE] {C} \logo [CPLUSPLUS] {C\high{++}} \logo [CALCMATH] {CalcMath} +\logo [CLD] {cld} \logo [CD] {cd} \logo [CPU] {cpu} \logo [CDROM] {cdrom} diff --git a/tex/context/base/s-abr-04.tex b/tex/context/base/s-abr-04.tex index ab3dc94cd..bcc2c8265 100644 --- a/tex/context/base/s-abr-04.tex +++ b/tex/context/base/s-abr-04.tex @@ -46,6 +46,7 @@ \logo [CCODE] {c} \logo [CALCMATH] {CalcMath} \logo [CD] {cd} +\logo [CLD] {cld} \logo [CPU] {cpu} \logo [CDROM] {cdrom} \logo [CID] {cid} diff --git a/tex/context/base/s-inf-03.mkiv b/tex/context/base/s-inf-03.mkiv index 0f4f82543..822173d00 100644 --- a/tex/context/base/s-inf-03.mkiv +++ b/tex/context/base/s-inf-03.mkiv @@ -4,11 +4,11 @@ % There is a loop somewhere ... todo. % \stoptext -\enablemode[ipad] % whatever that means +\enablemode[tablet] % whatever that means \setupbodyfont[dejavu] -\doifmodeelse {ipad} { +\doifmodeelse {tablet} { \setuppapersize [S6,landscape] @@ -90,21 +90,21 @@ local extralua = libraries.extralua local obsolete = libraries.obsolete local find = string.find -local color, goto = context.color, context.goto +local color, gotolocation = context.color, context["goto"] for k, v in table.sortedpairs(_G) do if obsolete[k] or find(k,"_") or k == "arg" or k == "utf" then -- elseif basiclua[k] then - goto(function() color( { "darkred" }, k) end, { k } ) + gotolocation(function() color( { "darkred" }, k) end, { k } ) elseif extralua[k] then - goto(function() color( { "darkgreen" }, k) end, { k } ) + gotolocation(function() color( { "darkgreen" }, k) end, { k } ) elseif basictex[k] then - goto(function() color( { "darkblue" }, k) end, { k } ) + gotolocation(function() color( { "darkblue" }, k) end, { k } ) elseif extratex[k] then - goto(function() color( { "darkyellow" }, k) end, { k } ) + gotolocation(function() color( { "darkyellow" }, k) end, { k } ) elseif type(v) == "table" then - goto(function() color( { "white" }, k) end, { k } ) + gotolocation(function() color( { "white" }, k) end, { k } ) end context(" ") end @@ -156,16 +156,19 @@ local byte = string.byte local upper = string.upper local skipglobal = table.tohash { - "_G", "context", "modules", "global", "arg", "utf", 1, + "_G", "_M", "_ENV", "", + "context", "modules", "global", "arg", "utf", 1, "_ptbs_", "_pcol_", "_plib_", "_clib_", "_tlib_", - "_M", "kpse", + "kpse", } local skipkeys = table.tohash { "_pcol_", "_plib_", "_clib_", "_tlib_", "_bpnf_", "_ptbs_", "_cldf_", "_cldn_", "_clmb_", "_clme_", "_clmm_", "_clmn_", "_clma_", "_clmh_", - "_G", "_M", "_VERSION", "_COPYRIGHT", "_DESCRIPTION", "_NAME", "_PACKAGE", "__unload", + "_G", "_M", "_ENV", "", + "_VERSION", "_COPYRIGHT", "_DESCRIPTION", "_NAME", "_PACKAGE", "__unload", + } local sameglobal = { @@ -229,7 +232,7 @@ end local NC, NR = context.NC, context.NR local overstrike, rlap, bf = context.overstrike, context.rlap, context.bf -local color, goto = context.color, context.goto +local color, gotolocation = context.color, context["goto"] local function cleanup(s) return "\\char" ..byte(s) .. " " @@ -295,19 +298,19 @@ local function show(title,subtitle,alias,builtin,t,lib,libcolor,glo,glocolor,mar NC() bf() if inlib then if not mark and t == "table" then - goto(function() color( { libcolor }, kstr) end, { k } ) + gotolocation(function() color( { libcolor }, kstr) end, { k } ) else color( { libcolor }, kstr) end elseif inglo then if not mark and t == "table" then - goto(function() color( { glocolor }, kstr) end, { k } ) + gotolocation(function() color( { glocolor }, kstr) end, { k } ) else color( { glocolor }, kstr) end else if not mark and t == "table" then - goto(k, { kstr } ) + gotolocation(k, { kstr } ) else context(kstr) end diff --git a/tex/context/base/s-mod-01.mkiv b/tex/context/base/s-mod-01.mkiv index ae119a7ee..6946bef69 100644 --- a/tex/context/base/s-mod-01.mkiv +++ b/tex/context/base/s-mod-01.mkiv @@ -25,6 +25,9 @@ %D him when you run into problems. Bugs in this style can be sent to %D Hans. +\usemodule + [abr-00] + \setvariables [document] [ file=\jobname, diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua index aa3caff96..99eebf0a2 100644 --- a/tex/context/base/sort-ini.lua +++ b/tex/context/base/sort-ini.lua @@ -43,11 +43,8 @@ with any demand so nothing here is frozen.

have language etc properties that then can be used.

]]-- - -local utf = unicode.utf8 local gsub, rep, sub, sort, concat = string.gsub, string.rep, string.sub, table.sort, table.concat -local utfbyte, utfchar = utf.byte, utf.char -local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues +local utfbyte, utfchar, utfcharacters, utfvalues = utf.byte, utf.char, utf.characters, utf.values local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset local allocate = utilities.storage.allocate @@ -246,7 +243,7 @@ local function preparetables(data) local b = utfbyte(k) n = decomposed[b] or { b } if trace_tests then - report_sorters(" 6 split: %s",utf.string(b)) + report_sorters(" 6 split: %s",utf.tostring(b)) end end if n then diff --git a/tex/context/base/spac-ver.mkiv b/tex/context/base/spac-ver.mkiv index eb05f66dc..0b280c149 100644 --- a/tex/context/base/spac-ver.mkiv +++ b/tex/context/base/spac-ver.mkiv @@ -949,17 +949,19 @@ {\dosingleempty\spac_struts_setup} \def\spac_struts_setup[#1]% - {\ifcsname\??struts#1\endcsname - \csname\??struts#1\endcsname % these are defined later + {\edef\m_strut{#1}% + \ifcsname\??struts\m_strut\endcsname + \csname\??struts\m_strut\endcsname % these are defined later \else - \setcharstrut{#1}% + \setcharstrut\m_strut \fi} -\def\synchronizestrut#1% no [] parsing, faster for internal - {\ifcsname\??struts#1\endcsname - \csname\??struts#1\endcsname +\unexpanded\def\synchronizestrut#1% no [] parsing, faster for internal + {\edef\m_strut{#1}% + \ifcsname\??struts\m_strut\endcsname + \csname\??struts\m_strut\endcsname \else - \setcharstrut{#1}% + \setcharstrut\m_strut \fi} \unexpanded\def\showstruts diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 2819a3f4b..53465a6bc 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf index 2371c0934..55b0cb780 100644 Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ diff --git a/tex/context/base/status-mkiv.tex b/tex/context/base/status-mkiv.tex index 7ca394a19..8685c97ad 100644 --- a/tex/context/base/status-mkiv.tex +++ b/tex/context/base/status-mkiv.tex @@ -86,6 +86,15 @@ \stoptitle +\starttitle[title=To keep an eye on] + +\startitemize[packed] + \startitem Currently lpeg replacements interpret the percent sign so we need to escape it. \stopitem + \startitem Currently numbers and strings are cast in comparisons bu tthat might change in the future. \stopitem +\stopitemize + +\stoptitle + \definehighlight[notabenered] [color=darkred, style=bold] \definehighlight[notabeneblue] [color=darkblue, style=bold] \definehighlight[notabeneyellow] [color=darkyellow, style=bold] diff --git a/tex/context/base/strc-bkm.lua b/tex/context/base/strc-bkm.lua index 4eb9551a9..dbb7d3853 100644 --- a/tex/context/base/strc-bkm.lua +++ b/tex/context/base/strc-bkm.lua @@ -14,7 +14,7 @@ if not modules then modules = { } end modules ['strc-bkm'] = { -- we should hook the placement into everystoptext ... needs checking local format, concat, gsub = string.format, table.concat, string.gsub -local utfvalues = string.utfvalues +local utfvalues = utf.values local settings_to_hash = utilities.parsers.settings_to_hash local codeinjections = backends.codeinjections diff --git a/tex/context/base/strc-doc.lua b/tex/context/base/strc-doc.lua index 7275534b7..d6b4cc96b 100644 --- a/tex/context/base/strc-doc.lua +++ b/tex/context/base/strc-doc.lua @@ -15,7 +15,7 @@ if not modules then modules = { } end modules ['strc-doc'] = { -- in section titles by default a zero aborts, so there we need: sectionset=bagger with \definestructureprefixset [bagger] [section-2,section-4] [] -- in lists however zero's are ignored, so there numbersegments=2:4 gives result -local next, type, tonumber = next, type, tonumber +local next, type, tonumber, select = next, type, tonumber, select local format, gsub, find, gmatch, match = string.format, string.gsub, string.find, string.gmatch, string.match local concat, fastcopy = table.concat, table.fastcopy local max, min = math.max, math.min @@ -596,9 +596,8 @@ function sections.typesetnumber(entry,kind,...) -- kind='section','number','pref local set = "" local segments = "" local criterium = "" - local dataset = { ... } - for d=1,#dataset do - local data = dataset[d] -- can be multiple parametersets + for d=1,select("#",...) do + local data = select(d,...) -- can be multiple parametersets if data then if separatorset == "" then separatorset = data.separatorset or "" end if conversionset == "" then conversionset = data.conversionset or "" end diff --git a/tex/context/base/strc-ini.lua b/tex/context/base/strc-ini.lua index 3f0a8768f..790899a21 100644 --- a/tex/context/base/strc-ini.lua +++ b/tex/context/base/strc-ini.lua @@ -20,10 +20,10 @@ but it does not make sense to store all processdata. ]]-- -local format, concat = string.format, table.concat +local format = string.format local lpegmatch = lpeg.match local count = tex.count -local type, next, tonumber = type, next, tonumber +local type, next, tonumber, select = type, next, tonumber, select local settings_to_array, settings_to_hash = utilities.parsers.settings_to_array, utilities.parsers.settings_to_hash local allocate = utilities.storage.allocate @@ -169,9 +169,9 @@ end helpers.simplify = simplify function helpers.merged(...) - local h, t = { ... }, { } - for k=1, #h do - local v = h[k] + local t = { } + for k=1, select("#",...) do + local v = select(k,...) if v and v ~= "" and not t[k] then t[k] = v end diff --git a/tex/context/base/strc-ref.lua b/tex/context/base/strc-ref.lua index 06a70cc28..0fd8fcd93 100644 --- a/tex/context/base/strc-ref.lua +++ b/tex/context/base/strc-ref.lua @@ -1780,7 +1780,7 @@ end function filters.generic.number(data,what,prefixspec) -- todo: spec and then no stopper if data then - local numberdata = data.numberdata + numberdata = lists.reordered(data) -- data.numberdata if numberdata then helpers.prefix(data,prefixspec) sections.typesetnumber(numberdata,"number",numberdata) diff --git a/tex/context/base/strc-ref.mkvi b/tex/context/base/strc-ref.mkvi index 2ace41c90..edbeded68 100644 --- a/tex/context/base/strc-ref.mkvi +++ b/tex/context/base/strc-ref.mkvi @@ -562,7 +562,7 @@ \def\expandreferencearguments#tag#content{\ctxcommand{setreferencearguments(#tag,\!!bs#content\!!es)}} \def\doifreferencefoundelse#labels#yes#nop% - {\ctxcommand{doifelsereference("\referenceprefix","#labels",\luaconditional\highlighthyperlinks,\luaconditional\gotonewwindow)}% + {\ctxcommand{doifelsereference("\referenceprefix",\!!bs#labels\!!es,\luaconditional\highlighthyperlinks,\luaconditional\gotonewwindow)}% {\expandtexincurrentreference #yes}% {#nop}} @@ -1544,7 +1544,7 @@ {\dotripleargument\strc_references_define_program} \def\strc_references_define_program[#name][#program][#description]% - {\ctxcommand{defineprogram("#name","#program","#description")}} + {\ctxcommand{defineprogram("#name",\!!bs#program\!!es,\!!bs#description\!!es)}} \def\program[#name]% incompatible, more consistent, hardy used anyway {\dontleavehmode diff --git a/tex/context/base/syst-aux.lua b/tex/context/base/syst-aux.lua index cda340cb6..b0fb8483b 100644 --- a/tex/context/base/syst-aux.lua +++ b/tex/context/base/syst-aux.lua @@ -14,7 +14,6 @@ if not modules then modules = { } end modules ['syst-aux'] = { local commands, context = commands, context local settings_to_array = utilities.parsers.settings_to_array -local concat = table.concat local format = string.format local utfsub = utf.sub local P, C, Carg, lpegmatch, utf8char = lpeg.P, lpeg.C, lpeg.Carg, lpeg.match, lpeg.patterns.utf8char diff --git a/tex/context/base/syst-con.lua b/tex/context/base/syst-con.lua index ae9d15a26..3a9b966ca 100644 --- a/tex/context/base/syst-con.lua +++ b/tex/context/base/syst-con.lua @@ -14,7 +14,7 @@ the top of 's char range but outside the unicode range.

--ldx]]-- local tonumber = tonumber -local utfchar = unicode.utf8.char +local utfchar = utf.char local gsub, format = string.gsub, string.format function converters.hexstringtonumber(n) tonumber(n,16) end diff --git a/tex/context/base/syst-lua.mkiv b/tex/context/base/syst-lua.mkiv index bca6ab3a2..88a8c246e 100644 --- a/tex/context/base/syst-lua.mkiv +++ b/tex/context/base/syst-lua.mkiv @@ -19,7 +19,7 @@ \def\expdoif #1#2{\ctxcommand{doif (\!!bs#1\!!es==\!!bs#2\!!es)}} \def\expdoifnot #1#2{\ctxcommand{doifnot (\!!bs#1\!!es==\!!bs#2\!!es)}} -% \testfeatureonce{100000}{\doifelse{hello world}{here i am}{}} % 0.3 +% \testfeatureonce{100000}{\doifelse{hello world}{here i am}{}} % 0.3 % \testfeatureonce{100000}{\expandabledoifelse{hello world}{here i am}{}} % 1.5 \def\expdoifcommonelse#1#2{\ctxcommand{doifcommonelse("#1","#2")}} @@ -34,10 +34,14 @@ \unexpanded\def\writestatus#1#2{\ctxcommand{writestatus(\!!bs#1\!!es,\!!bs#2\!!es)}} -% a handy helper (we can probably omit the tex.ctxcatcodes here as nowadays we seldom -% change the regime at the tex end +% A handy helper: -\def\luaexpr#1{\ctxlua{context(tostring(#1))}} +% \def\luaexpr#1{\ctxlua{context(tostring(#1))}} % more efficient: + +% We can omit the tex.ctxcatcodes here as nowadays we seldom +% change the regime at the \TEX\ end: + +\def\luaexpr#1{\directlua{tex.print(tostring(#1))}} % helpers: diff --git a/tex/context/base/toks-ini.lua b/tex/context/base/toks-ini.lua index 1f4d48466..f88e0fcb5 100644 --- a/tex/context/base/toks-ini.lua +++ b/tex/context/base/toks-ini.lua @@ -5,8 +5,7 @@ if not modules then modules = { } end modules ['toks-ini'] = { license = "see context related readme files" } -local utf = unicode.utf8 -local utfbyte, utfchar = utf.byte, utf.char +local utfbyte, utfchar, utfvalues = utf.byte, utf.char, utf.values local format, gsub = string.format, string.gsub --[[ldx-- @@ -56,7 +55,7 @@ tokens.other = function(chr) return createtoken(utfbyte(chr), 12) end tokens.letters = function(str) local t, n = { }, 0 - for chr in string.utfvalues(str) do + for chr in utfvalues(str) do n = n + 1 t[n] = createtoken(chr, 11) end diff --git a/tex/context/base/trac-log.lua b/tex/context/base/trac-log.lua index 44de8ebe6..6c8213099 100644 --- a/tex/context/base/trac-log.lua +++ b/tex/context/base/trac-log.lua @@ -14,9 +14,9 @@ if not modules then modules = { } end modules ['trac-log'] = { local write_nl, write = texio and texio.write_nl or print, texio and texio.write or io.write local format, gmatch, find = string.format, string.gmatch, string.find local concat, insert, remove = table.concat, table.insert, table.remove -local escapedpattern = string.escapedpattern +local topattern = string.topattern local texcount = tex and tex.count -local next, type = next, type +local next, type, select = next, type, select local setmetatableindex = table.setmetatableindex @@ -330,7 +330,7 @@ local function setblocked(category,value) if data[c] then v.state = value else - c = escapedpattern(c,true) + c = topattern(c,true,true) for k, v in next, data do if find(k,c) then v.state = value @@ -548,10 +548,10 @@ local function reporthelp(t,...) if type(helpinfo) == "string" then reportlines(t,helpinfo) elseif type(helpinfo) == "table" then - local tags = { ... } - for i=1,#tags do - reportlines(t,t.helpinfo[tags[i]]) - if i < #tags then + local n = select("#",...) + for i=1,n do + reportlines(t,t.helpinfo[select(i,...)]) + if i < n then t.report() end end diff --git a/tex/context/base/trac-set.lua b/tex/context/base/trac-set.lua index 8c842f542..ddbd94825 100644 --- a/tex/context/base/trac-set.lua +++ b/tex/context/base/trac-set.lua @@ -10,7 +10,7 @@ if not modules then modules = { } end modules ['trac-set'] = { -- might become u local type, next, tostring = type, next, tostring local concat = table.concat -local format, find, lower, gsub, escapedpattern = string.format, string.find, string.lower, string.gsub, string.escapedpattern +local format, find, lower, gsub, topattern = string.format, string.find, string.lower, string.gsub, string.topattern local is_boolean = string.is_boolean local settings_to_hash = utilities.parsers.settings_to_hash local allocate = utilities.storage.allocate @@ -98,7 +98,7 @@ local function set(t,what,newvalue) else value = is_boolean(value,value) end - w = "^" .. escapedpattern(w,true) .. "$" -- new: anchored + w = topattern(w,true,true) for name, functions in next, data do if done[name] then -- prevent recursion due to wildcards @@ -152,7 +152,8 @@ function setters.register(t,what,...) end end local default = functions.default -- can be set from cnf file - for _, fnc in next, { ... } do + for i=1,select("#",...) do + local fnc = select(i,...) local typ = type(fnc) if typ == "string" then if trace_initialize then @@ -221,7 +222,7 @@ function setters.show(t) local value, default, modules = functions.value, functions.default, #functions value = value == nil and "unset" or tostring(value) default = default == nil and "unset" or tostring(default) - t.report("%-50s modules: %2i default: %6s value: %6s",name,modules,default,value) + t.report("%-50s modules: %2i default: %-12s value: %-12s",name,modules,default,value) end end t.report() @@ -245,17 +246,29 @@ local function report(setter,...) end end -function setters.new(name) +local function default(setter,name) + local d = setter.data[name] + return d and d.default +end + +local function value(setter,name) + local d = setter.data[name] + return d and (d.value or d.default) +end + +function setters.new(name) -- we could use foo:bar syntax (but not used that often) local setter -- we need to access it in setter itself setter = { data = allocate(), -- indexed, but also default and value fields name = name, - report = function(...) report (setter,...) end, - enable = function(...) enable (setter,...) end, - disable = function(...) disable (setter,...) end, - register = function(...) register(setter,...) end, - list = function(...) list (setter,...) end, - show = function(...) show (setter,...) end, + report = function(...) report (setter,...) end, + enable = function(...) enable (setter,...) end, + disable = function(...) disable (setter,...) end, + register = function(...) register(setter,...) end, + list = function(...) list (setter,...) end, + show = function(...) show (setter,...) end, + default = function(...) return default (setter,...) end, + value = function(...) return value (setter,...) end, } data[name] = setter return setter diff --git a/tex/context/base/trac-tim.lua b/tex/context/base/trac-tim.lua index 66ac518bb..e76882afc 100644 --- a/tex/context/base/trac-tim.lua +++ b/tex/context/base/trac-tim.lua @@ -14,6 +14,8 @@ moduledata = moduledata or { } local progress = moduledata.progress or { } moduledata.progress = progress +local report_timing = logs.reporter("timing") + progress.parameters = nodes.snapshots.getparameters progress.defaultfilename = ((tex and tex.jobname) or "whatever") .. "-luatex-progress" @@ -24,7 +26,9 @@ function progress.store() end function progress.save(name) - table.save((name or progress.defaultfilename) .. ".lut",nodes.snapshots.getsamples()) + local filename = (name or progress.defaultfilename) .. ".lut" + report_timing("saving data in %q",filename) + table.save(filename,nodes.snapshots.getsamples()) nodes.snapshots.resetsamples() end diff --git a/tex/context/base/typo-dir.lua b/tex/context/base/typo-dir.lua index 13c0aec97..ee9407074 100644 --- a/tex/context/base/typo-dir.lua +++ b/tex/context/base/typo-dir.lua @@ -6,8 +6,6 @@ if not modules then modules = { } end modules ['typo-dir'] = { license = "see context related readme files" } -local utf = unicode.utf8 - local next, type = next, type local format, insert, sub, find, match = string.format, table.insert, string.sub, string.find, string.match local utfchar = utf.char diff --git a/tex/context/base/typo-krn.lua b/tex/context/base/typo-krn.lua index e9b9f1f45..cd2639c67 100644 --- a/tex/context/base/typo-krn.lua +++ b/tex/context/base/typo-krn.lua @@ -6,8 +6,6 @@ if not modules then modules = { } end modules ['typo-krn'] = { license = "see context related readme files" } -local utf = unicode.utf8 - local next, type, tonumber = next, type, tonumber local utfchar = utf.char diff --git a/tex/context/base/typo-spa.lua b/tex/context/base/typo-spa.lua index 9e653ad55..d40aed579 100644 --- a/tex/context/base/typo-spa.lua +++ b/tex/context/base/typo-spa.lua @@ -6,8 +6,6 @@ if not modules then modules = { } end modules ['typo-spa'] = { license = "see context related readme files" } -local utf = unicode.utf8 - local next, type = next, type local utfchar = utf.char diff --git a/tex/context/base/util-lua.lua b/tex/context/base/util-lua.lua index df69bba0e..b496880b2 100644 --- a/tex/context/base/util-lua.lua +++ b/tex/context/base/util-lua.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['util-lua'] = { } local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format -local loadstring, loadfile, type = loadstring, loadfile, type +local load, loadfile, type = load, loadfile, type utilities = utilities or {} utilities.lua = utilities.lua or { } @@ -25,11 +25,23 @@ luautilities.nofstrippedbytes = 0 local strippedchunks = { } -- allocate() luautilities.strippedchunks = strippedchunks +luautilities.suffixes = { + tma = "tma", + tmc = jit and "tmb" or "tmc", + lua = "lua", + luc = jit and "lub" or "luc", + lui = "lui", + luv = "luv", + luj = "luj", + tua = "tua", + tuc = "tuc", +} + local function fatalerror(name) utilities.report(format("fatal error in %q",name or "unknown")) end -if jit then +if jit or status.luatex_version >= 74 then local function register(name) if tracestripping then @@ -42,7 +54,7 @@ if jit then local function stupidcompile(luafile,lucfile,strip) local code = io.loaddata(luafile) if code and code ~= "" then - code = loadstring(code) + code = load(code) if code then code = dump(code,strip and luautilities.stripcode or luautilities.alwaysstripcode) if code and code ~= "" then @@ -74,13 +86,13 @@ if jit then end if forcestrip or luautilities.alwaysstripcode then register(name) - return loadstring(dump(code,true)), 0 + return load(dump(code,true)), 0 else return code, 0 end elseif luautilities.alwaysstripcode then register(name) - return loadstring(dump(code,true)), 0 + return load(dump(code,true)), 0 else return code, 0 end @@ -88,14 +100,14 @@ if jit then function luautilities.strippedloadstring(code,forcestrip,name) -- not executed if forcestrip and luautilities.stripcode or luautilities.alwaysstripcode then - code = loadstring(code) + code = load(code) if not code then fatalerror(name) end register(name) code = dump(code,true) end - return loadstring(code), 0 + return load(code), 0 end function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true @@ -136,67 +148,79 @@ else return delta end - local function strip_code_pc(dump,name) - local before = #dump - local version, format, endian, int, size, ins, num = byte(dump,5,11) - local subint - if endian == 1 then - subint = function(dump, i, l) - local val = 0 - for n = l, 1, -1 do - val = val * 256 + byte(dump,i + n - 1) + local strip_code_pc + + if _MAJORVERSION == 5 and _MINORVERSION == 1 then + + strip_code_pc = function(dump,name) + local before = #dump + local version, format, endian, int, size, ins, num = byte(dump,5,11) + local subint + if endian == 1 then + subint = function(dump, i, l) + local val = 0 + for n = l, 1, -1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l end - return val, i + l - end - else - subint = function(dump, i, l) - local val = 0 - for n = 1, l, 1 do - val = val * 256 + byte(dump,i + n - 1) + else + subint = function(dump, i, l) + local val = 0 + for n = 1, l, 1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l end - return val, i + l end - end - local strip_function - strip_function = function(dump) - local count, offset = subint(dump, 1, size) - local stripped, dirty = rep("\0", size), offset + count - offset = offset + count + int * 2 + 4 - offset = offset + int + subint(dump, offset, int) * ins - count, offset = subint(dump, offset, int) - for n = 1, count do - local t - t, offset = subint(dump, offset, 1) - if t == 1 then - offset = offset + 1 - elseif t == 4 then - offset = offset + size + subint(dump, offset, size) - elseif t == 3 then - offset = offset + num + local strip_function + strip_function = function(dump) + local count, offset = subint(dump, 1, size) + local stripped, dirty = rep("\0", size), offset + count + offset = offset + count + int * 2 + 4 + offset = offset + int + subint(dump, offset, int) * ins + count, offset = subint(dump, offset, int) + for n = 1, count do + local t + t, offset = subint(dump, offset, 1) + if t == 1 then + offset = offset + 1 + elseif t == 4 then + offset = offset + size + subint(dump, offset, size) + elseif t == 3 then + offset = offset + num + end end + count, offset = subint(dump, offset, int) + stripped = stripped .. sub(dump,dirty, offset - 1) + for n = 1, count do + local proto, off = strip_function(sub(dump,offset, -1)) + stripped, offset = stripped .. proto, offset + off - 1 + end + offset = offset + subint(dump, offset, int) * int + int + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + int * 2 + end + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + end + stripped = stripped .. rep("\0", int * 3) + return stripped, offset end - count, offset = subint(dump, offset, int) - stripped = stripped .. sub(dump,dirty, offset - 1) - for n = 1, count do - local proto, off = strip_function(sub(dump,offset, -1)) - stripped, offset = stripped .. proto, offset + off - 1 - end - offset = offset + subint(dump, offset, int) * int + int - count, offset = subint(dump, offset, int) - for n = 1, count do - offset = offset + subint(dump, offset, size) + size + int * 2 - end - count, offset = subint(dump, offset, int) - for n = 1, count do - offset = offset + subint(dump, offset, size) + size - end - stripped = stripped .. rep("\0", int * 3) - return stripped, offset + dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) + local after = #dump + local delta = register(name,before,after) + return dump, delta end - dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) - local after = #dump - local delta = register(name,before,after) - return dump, delta + + else + + strip_code_pc = function(dump,name) + return dump, 0 + end + end -- ... end of borrowed code. @@ -216,14 +240,14 @@ else end if forcestrip then local code, n = strip_code_pc(dump(code),name) - return loadstring(code), n + return load(code), n elseif luautilities.alwaysstripcode then - return loadstring(strip_code_pc(dump(code),name)) + return load(strip_code_pc(dump(code),name)) else return code, 0 end elseif luautilities.alwaysstripcode then - return loadstring(strip_code_pc(dump(code),name)) + return load(strip_code_pc(dump(code),name)) else return code, 0 end @@ -232,20 +256,20 @@ else function luautilities.strippedloadstring(code,forcestrip,name) -- not executed local n = 0 if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then - code = loadstring(code) + code = load(code) if not code then fatalerror(name) end code, n = strip_code_pc(dump(code),name) end - return loadstring(code), n + return load(code), n end local function stupidcompile(luafile,lucfile,strip) local code = io.loaddata(luafile) local n = 0 if code and code ~= "" then - code = loadstring(code) + code = load(code) if not code then fatalerror() end @@ -285,6 +309,7 @@ else utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile) end cleanup = false -- better see how bad it is + done = true -- hm end if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then utilities.report("lua: removing %s",luafile) diff --git a/tex/context/base/util-pck.lua b/tex/context/base/util-pck.lua index d964c7a23..7937ff557 100644 --- a/tex/context/base/util-pck.lua +++ b/tex/context/base/util-pck.lua @@ -10,6 +10,7 @@ if not modules then modules = { } end modules ['util-pck'] = { local next, tostring, type = next, tostring, type local sort, concat = table.sort, table.concat +local sortedhashkeys, sortedkeys = table.sortedhashkeys, table.sortedkeys utilities = utilities or { } utilities.packers = utilities.packers or { } @@ -43,27 +44,19 @@ end packers.hashed = hashed packers.simplehashed = simplehashed ---~ local function pack(t,keys,hash,index) ---~ for k,v in next, t do ---~ if type(v) == "table" then ---~ pack(v,keys,hash,index) ---~ end ---~ if keys[k] and type(v) == "table" then ---~ local h = hashed(v) ---~ local i = hash[h] ---~ if not i then ---~ i = #index + 1 ---~ index[i] = v ---~ hash[h] = i ---~ end ---~ t[k] = i ---~ end ---~ end ---~ end +-- In luatex < 0.74 (lua 5.1) a next chain was the same for each run so no sort was needed, +-- but in the latest greatest versions (lua 5.2) we really need to sort the keys in order +-- not to get endless runs due to a difference in tuc files. local function pack(t,keys,hash,index) if t then - for k,v in next, t do + -- for k, v in next, t do + -- local sk = sortedkeys(t) + local sk = sortedhashkeys(t) + for i=1,#sk do + local k = sk[i] + local v = t[k] + -- if type(v) == "table" then pack(v,keys,hash,index) if keys[k] then diff --git a/tex/context/base/util-prs.lua b/tex/context/base/util-prs.lua index 506497875..6f16af666 100644 --- a/tex/context/base/util-prs.lua +++ b/tex/context/base/util-prs.lua @@ -7,7 +7,6 @@ if not modules then modules = { } end modules ['util-prs'] = { } local lpeg, table, string = lpeg, table, string - local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find @@ -425,3 +424,42 @@ function parsers.rfc4180splitter(specification) end end +-- utilities.parsers.stepper("1,7-",9,function(i) print(">>>",i) end) +-- utilities.parsers.stepper("1-3,7,8,9") +-- utilities.parsers.stepper("1-3,6,7",function(i) print(">>>",i) end) +-- utilities.parsers.stepper(" 1 : 3, ,7 ") +-- utilities.parsers.stepper("1:4,9:13,24:*",30) + +local function ranger(first,last,n,action) + if not first then + -- forget about it + elseif last == true then + for i=first,n or first do + action(i) + end + elseif last then + for i=first,last do + action(i) + end + else + action(first) + end +end + +local cardinal = patterns.cardinal / tonumber +local spacers = patterns.spacer^0 +local endofstring = patterns.endofstring + +local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + Cc(true) ) + Cc(false) ) + * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 + +local stepper = spacers * ( C(cardinal) * ( spacers * S(":-") * spacers * ( C(cardinal) + (P("*") + endofstring) * Cc(true) ) + Cc(false) ) + * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring -- we're sort of strict (could do without endofstring) + +function utilities.parsers.stepper(str,n,action) + if type(n) == "function" then + lpegmatch(stepper,str,1,false,n or print) + else + lpegmatch(stepper,str,1,n,action or print) + end +end diff --git a/tex/context/base/util-seq.lua b/tex/context/base/util-seq.lua index c3361b7be..711424a2c 100644 --- a/tex/context/base/util-seq.lua +++ b/tex/context/base/util-seq.lua @@ -18,7 +18,7 @@ use locals to refer to them when compiling the chain.

-- todo: protect groups (as in tasks) local format, gsub, concat, gmatch = string.format, string.gsub, table.concat, string.gmatch -local type, loadstring = type, loadstring +local type, load = type, load utilities = utilities or { } local tables = utilities.tables @@ -257,7 +257,7 @@ compile = function(t,compiler,n) -- already referred to in sequencers.new if compiled == "" then runner = false else - runner = compiled and loadstring(compiled)() + runner = compiled and load(compiled)() end t.runner = runner return runner diff --git a/tex/context/base/util-sql-users.lua b/tex/context/base/util-sql-users.lua index 4bf5057b5..5b6d0906a 100644 --- a/tex/context/base/util-sql-users.lua +++ b/tex/context/base/util-sql-users.lua @@ -13,7 +13,7 @@ if not modules then modules = { } end modules ['util-sql-users'] = { local sql = require("util-sql") local md5 = require("md5") -local format, upper, find, gsub, escapedpattern = string.format, string.upper, string.find, string.gsub, string.escapedpattern +local format, upper, find, gsub, topattern = string.format, string.upper, string.find, string.gsub, string.topattern local sumhexa = md5.sumhexa local booleanstring = string.booleanstring @@ -52,7 +52,7 @@ end local function validaddress(address,addresses) if address and addresses and address ~= "" and addresses ~= "" then - if find(address,"^" .. escapedpattern(addresses,true)) then -- simple escapes + if find(address,topattern(addresses,true,true)) then return true, "valid remote address" end return false, "invalid remote address" diff --git a/tex/context/base/util-sql.lua b/tex/context/base/util-sql.lua index fc460b984..5310ea699 100644 --- a/tex/context/base/util-sql.lua +++ b/tex/context/base/util-sql.lua @@ -60,7 +60,7 @@ if not modules then modules = { } end modules ['util-sql'] = { local format, match = string.format, string.match local random = math.random -local rawset, setmetatable, getmetatable, loadstring, type = rawset, setmetatable, getmetatable, loadstring, type +local rawset, setmetatable, getmetatable, load, type = rawset, setmetatable, getmetatable, load, type local P, S, V, C, Cs, Ct, Cc, Cg, Cf, patterns, lpegmatch = lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.patterns, lpeg.match local concat = table.concat @@ -149,7 +149,7 @@ local function makeconverter(entries,celltemplate,wraptemplate) end end local code = format(wraptemplate,concat(shortcuts,"\n"),concat(assignments,"\n ")) - local func = loadstring(code) + local func = load(code) return func and func() end diff --git a/tex/context/base/util-sta.lua b/tex/context/base/util-sta.lua index f94c1acdf..1a61ec4e6 100644 --- a/tex/context/base/util-sta.lua +++ b/tex/context/base/util-sta.lua @@ -8,6 +8,7 @@ if not modules then modules = { } end modules ['util-sta'] = { local insert, remove, fastcopy, concat = table.insert, table.remove, table.fastcopy, table.concat local format = string.format +local select, tostring = select, tostring local trace_stacker = false trackers.register("stacker.resolve", function(v) trace_stacker = v end) @@ -90,9 +91,8 @@ function stacker.new(name) local hashing = true local function push(...) - local t = { ... } - for i=1,#t do - insert(stack,t[i]) + for i=1,select("#",...) do + insert(stack,(select(i,...))) -- watch the () end if hashing then local c = concat(stack,"|") diff --git a/tex/context/base/util-sto.lua b/tex/context/base/util-sto.lua index 42ee6cf00..147f17ea9 100644 --- a/tex/context/base/util-sto.lua +++ b/tex/context/base/util-sto.lua @@ -12,9 +12,11 @@ utilities = utilities or { } utilities.storage = utilities.storage or { } local storage = utilities.storage +local report = texio and texio.write_nl or print + function storage.mark(t) if not t then - texio.write_nl("fatal error: storage cannot be marked") + report("fatal error: storage cannot be marked") return -- os.exit() end local m = getmetatable(t) @@ -44,36 +46,36 @@ end function storage.checked(t) if not t then - texio.write_nl("fatal error: storage has not been allocated") + report("fatal error: storage has not been allocated") return -- os.exit() end return t end ---~ function utilities.storage.delay(parent,name,filename) ---~ local m = getmetatable(parent) ---~ m.__list[name] = filename ---~ end ---~ ---~ function utilities.storage.predefine(parent) ---~ local list = { } ---~ local m = getmetatable(parent) or { ---~ __list = list, ---~ __index = function(t,k) ---~ local l = require(list[k]) ---~ t[k] = l ---~ return l ---~ end ---~ } ---~ setmetatable(parent,m) ---~ end ---~ ---~ bla = { } ---~ utilities.storage.predefine(bla) ---~ utilities.storage.delay(bla,"test","oepsoeps") ---~ local t = bla.test ---~ table.print(t) ---~ print(t.a) +-- function utilities.storage.delay(parent,name,filename) +-- local m = getmetatable(parent) +-- m.__list[name] = filename +-- end +-- +-- function utilities.storage.predefine(parent) +-- local list = { } +-- local m = getmetatable(parent) or { +-- __list = list, +-- __index = function(t,k) +-- local l = require(list[k]) +-- t[k] = l +-- return l +-- end +-- } +-- setmetatable(parent,m) +-- end +-- +-- bla = { } +-- utilities.storage.predefine(bla) +-- utilities.storage.delay(bla,"test","oepsoeps") +-- local t = bla.test +-- table.print(t) +-- print(t.a) function storage.setinitializer(data,initialize) local m = getmetatable(data) or { } @@ -98,12 +100,14 @@ end -- table namespace ? -local function f_empty () return "" end -- t,k -local function f_self (t,k) t[k] = k return k end -local function f_ignore() end -- t,k,v +local function f_empty () return "" end -- t,k +local function f_self (t,k) t[k] = k return k end +local function f_table (t,k) local v = { } t[k] = v return v end +local function f_ignore() end -- t,k,v local t_empty = { __index = f_empty } local t_self = { __index = f_self } +local t_table = { __index = f_table } local t_ignore = { __newindex = f_ignore } function table.setmetatableindex(t,f) @@ -113,6 +117,8 @@ function table.setmetatableindex(t,f) m.__index = f_empty elseif f == "key" then m.__index = f_self + elseif f == "table" then + m.__index = f_table else m.__index = f end @@ -121,6 +127,8 @@ function table.setmetatableindex(t,f) setmetatable(t, t_empty) elseif f == "key" then setmetatable(t, t_self) + elseif f == "table" then + setmetatable(t, t_table) else setmetatable(t,{ __index = f }) end diff --git a/tex/context/base/util-tab.lua b/tex/context/base/util-tab.lua index c547d5b3e..e3d6a9f7d 100644 --- a/tex/context/base/util-tab.lua +++ b/tex/context/base/util-tab.lua @@ -13,24 +13,10 @@ local tables = utilities.tables local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring -local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring +local type, next, rawset, tonumber, load, select = type, next, rawset, tonumber, load, select local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs local serialize = table.serialize --- function tables.definetable(target) -- defines undefined tables --- local composed, t, n = nil, { }, 0 --- for name in gmatch(target,"([^%.]+)") do --- n = n + 1 --- if composed then --- composed = composed .. "." .. name --- else --- composed = name --- end --- t[n] = format("%s = %s or { }",composed,composed) --- end --- return concat(t,"\n") --- end - local splitter = lpeg.tsplitat(".") function tables.definetable(target,nofirst,nolast) -- defines undefined tables @@ -59,13 +45,13 @@ end -- local t = tables.definedtable("a","b","c","d") function tables.definedtable(...) - local l = { ... } local t = _G - for i=1,#l do - local tl = t[l[i]] + for i=1,select("#",...) do + local li = select(i,...) + local tl = t[li] if not tl then tl = { } - t[l[i]] = tl + t[li] = tl end t = tl end @@ -258,7 +244,7 @@ function table.deserialize(str) if not str or str == "" then return end - local code = loadstring(str) + local code = load(str) if not code then return end @@ -275,7 +261,7 @@ function table.load(filename) if filename then local t = io.loaddata(filename) if t and t ~= "" then - t = loadstring(t) + t = load(t) if type(t) == "function" then t = t() if type(t) == "table" then diff --git a/tex/context/base/x-mathml.lua b/tex/context/base/x-mathml.lua index 430122786..f35251d37 100644 --- a/tex/context/base/x-mathml.lua +++ b/tex/context/base/x-mathml.lua @@ -9,13 +9,11 @@ if not modules then modules = { } end modules ['x-mathml'] = { -- This needs an upgrade to the latest greatest mechanisms. local type, next = type, next -local utf = unicode.utf8 local format, lower, find, gsub = string.format, string.lower, string.find, string.gsub local strip = string.strip -local utfchar = utf.char local xmlsprint, xmlcprint, xmltext, xmlcontent = xml.sprint, xml.cprint, xml.text, xml.content local getid = lxml.getid -local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues +local utfchar. utfcharacters, utfvalues = utf.char, utf.characters, utf.values local lpegmatch = lpeg.match local mathml = { } diff --git a/tex/context/interface/keys-cs.xml b/tex/context/interface/keys-cs.xml index 0fe7f4b4d..05c9fdf1d 100644 --- a/tex/context/interface/keys-cs.xml +++ b/tex/context/interface/keys-cs.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/context/interface/keys-de.xml b/tex/context/interface/keys-de.xml index c14f7c526..31a48c2a2 100644 --- a/tex/context/interface/keys-de.xml +++ b/tex/context/interface/keys-de.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/context/interface/keys-en.xml b/tex/context/interface/keys-en.xml index d981143b0..57475e65b 100644 --- a/tex/context/interface/keys-en.xml +++ b/tex/context/interface/keys-en.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/context/interface/keys-fr.xml b/tex/context/interface/keys-fr.xml index 3d25240bf..7fb90a042 100644 --- a/tex/context/interface/keys-fr.xml +++ b/tex/context/interface/keys-fr.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/context/interface/keys-it.xml b/tex/context/interface/keys-it.xml index cc08f1e3e..336930e16 100644 --- a/tex/context/interface/keys-it.xml +++ b/tex/context/interface/keys-it.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/context/interface/keys-nl.xml b/tex/context/interface/keys-nl.xml index 039d6e882..4e0797424 100644 --- a/tex/context/interface/keys-nl.xml +++ b/tex/context/interface/keys-nl.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/context/interface/keys-pe.xml b/tex/context/interface/keys-pe.xml index 6615a3488..014a138ac 100644 --- a/tex/context/interface/keys-pe.xml +++ b/tex/context/interface/keys-pe.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/context/interface/keys-ro.xml b/tex/context/interface/keys-ro.xml index fb827945e..77209d4bb 100644 --- a/tex/context/interface/keys-ro.xml +++ b/tex/context/interface/keys-ro.xml @@ -1711,6 +1711,7 @@ + diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 1f402cd4f..2b1f94b14 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,10 +1,10 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 12/10/12 23:20:34 +-- merge date : 12/24/12 20:08:31 do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['l-functions'] = { +if not modules then modules = { } end modules ['l-lpeg'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -12,1167 +12,1004 @@ if not modules then modules = { } end modules ['l-functions'] = { license = "see context related readme files" } -functions = functions or { } - -function functions.dummy() end +-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) -end -- closure +-- move utf -> l-unicode +-- move string -> l-string or keep it here -do -- begin closure to overcome local limits and interference +local lpeg = require("lpeg") -if not modules then modules = { } end modules ['l-string'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} +-- tracing (only used when we encounter a problem in integration of lpeg in luatex) -local string = string -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct +-- some code will move to unicode and string --- some functions may disappear as they are not used anywhere +local report = texio and texio.write_nl or print -if not string.split then +-- local lpmatch = lpeg.match +-- local lpprint = lpeg.print +-- local lpp = lpeg.P +-- local lpr = lpeg.R +-- local lps = lpeg.S +-- local lpc = lpeg.C +-- local lpb = lpeg.B +-- local lpv = lpeg.V +-- local lpcf = lpeg.Cf +-- local lpcb = lpeg.Cb +-- local lpcg = lpeg.Cg +-- local lpct = lpeg.Ct +-- local lpcs = lpeg.Cs +-- local lpcc = lpeg.Cc +-- local lpcmt = lpeg.Cmt +-- local lpcarg = lpeg.Carg - -- this will be overloaded by a faster lpeg variant +-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end - function string.split(str,pattern) - local t = { } - if #str > 0 then - local n = 1 - for s in gmatch(str..pattern,"(.-)"..pattern) do - t[n] = s - n = n + 1 - end - end - return t - end +-- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end +-- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end +-- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end +-- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end +-- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end +-- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end +-- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end +-- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end +-- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end +-- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end +-- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end +-- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end +-- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end +-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -end +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -function string.unquoted(str) - return (gsub(str,"^([\"\'])(.*)%1$","%2")) -end +-- Beware, we predefine a bunch of patterns here and one reason for doing so +-- is that we get consistent behaviour in some of the visualizers. ---~ function stringunquoted(str) ---~ if find(str,"^[\'\"]") then ---~ return sub(str,2,-2) ---~ else ---~ return str ---~ end ---~ end +lpeg.patterns = lpeg.patterns or { } -- so that we can share +local patterns = lpeg.patterns -function string.quoted(str) - return format("%q",str) -- always " -end +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt +local lpegtype, lpegmatch = lpeg.type, lpeg.match -function string.count(str,pattern) -- variant 3 - local n = 0 - for _ in gmatch(str,pattern) do -- not for utf - n = n + 1 - end - return n -end +local anything = P(1) +local endofstring = P(-1) +local alwaysmatched = P(true) -function string.limit(str,n,sentinel) -- not utf proof - if #str > n then - sentinel = sentinel or "..." - return sub(str,1,(n-#sentinel)) .. sentinel - else - return str - end -end +patterns.anything = anything +patterns.endofstring = endofstring +patterns.beginofstring = alwaysmatched +patterns.alwaysmatched = alwaysmatched -local space = S(" \t\v\n") -local nospace = 1 - space -local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code +local digit, sign = R('09'), S('+-') +local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") +local newline = crlf + S("\r\n") -- cr + lf +local escaped = P("\\") * anything +local squote = P("'") +local dquote = P('"') +local space = P(" ") -function string.strip(str) - return lpegmatch(stripper,str) or "" -end +local utfbom_32_be = P('\000\000\254\255') +local utfbom_32_le = P('\255\254\000\000') +local utfbom_16_be = P('\255\254') +local utfbom_16_le = P('\254\255') +local utfbom_8 = P('\239\187\191') +local utfbom = utfbom_32_be + utfbom_32_le + + utfbom_16_be + utfbom_16_le + + utfbom_8 +local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + + utfbom_8 * Cc(3) + Cc(0) -function string.is_empty(str) - return not find(str,"%S") -end +local utf8next = R("\128\191") -local patterns_escapes = { - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%(", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} +patterns.utf8one = R("\000\127") +patterns.utf8two = R("\194\223") * utf8next +patterns.utf8three = R("\224\239") * utf8next * utf8next +patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next +patterns.utfbom = utfbom +patterns.utftype = utftype +patterns.utfoffset = utfoffset -local simple_escapes = { - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} +local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four +local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) -function string.escapedpattern(str,simple) - return (gsub(str,".",simple and simple_escapes or patterns_escapes)) -end +patterns.utf8 = utf8char +patterns.utf8char = utf8char +patterns.validutf8 = validutf8char +patterns.validutf8char = validutf8char -function string.topattern(str,lowercase,strict) - if str == "" then - return ".*" - else - str = gsub(str,".",simple_escapes) - if lowercase then - str = lower(str) - end - if strict then - return "^" .. str .. "$" - else - return str - end - end -end +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer +local nonspacer = 1 - spacer +local nonwhitespace = 1 - whitespace +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace +patterns.nonspacer = nonspacer +patterns.nonwhitespace = nonwhitespace -function string.valid(str,default) - return (type(str) == "string" and str ~= "" and str) or default or nil -end +local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto --- obsolete names: +----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) -string.quote = string.quoted -string.unquote = string.unquoted +patterns.stripper = stripper +patterns.collapser = collapser --- handy fallback +patterns.digit = digit +patterns.sign = sign +patterns.cardinal = sign^0 * digit^1 +patterns.integer = sign^0 * digit^1 +patterns.unsigned = digit^0 * P('.') * digit^1 +patterns.float = sign^0 * patterns.unsigned +patterns.cunsigned = digit^0 * P(',') * digit^1 +patterns.cfloat = sign^0 * patterns.cunsigned +patterns.number = patterns.float + patterns.integer +patterns.cnumber = patterns.cfloat + patterns.integer +patterns.oct = P("0") * R("07")^1 +patterns.octal = patterns.oct +patterns.HEX = P("0x") * R("09","AF")^1 +patterns.hex = P("0x") * R("09","af")^1 +patterns.hexadecimal = P("0x") * R("09","AF","af")^1 +patterns.lowercase = R("az") +patterns.uppercase = R("AZ") +patterns.letter = patterns.lowercase + patterns.uppercase +patterns.space = space +patterns.tab = P("\t") +patterns.spaceortab = patterns.space + patterns.tab +patterns.newline = newline +patterns.emptyline = newline^1 +patterns.equal = P("=") +patterns.comma = P(",") +patterns.commaspacer = P(",") * spacer^0 +patterns.period = P(".") +patterns.colon = P(":") +patterns.semicolon = P(";") +patterns.underscore = P("_") +patterns.escaped = escaped +patterns.squote = squote +patterns.dquote = dquote +patterns.nosquote = (escaped + (1-squote))^0 +patterns.nodquote = (escaped + (1-dquote))^0 +patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle +patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble +patterns.unspacer = ((patterns.spacer^1)/"")^0 -string.itself = function(s) return s end +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted --- also handy (see utf variant) +patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) -local pattern = Ct(C(1)^0) -- string and not utf ! +patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 +patterns.beginline = #(1-newline) -function string.totable(str) - return lpegmatch(pattern,str) +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } end -end -- closure - -do -- begin closure to overcome local limits and interference - -if not modules then modules = { } end modules ['l-table'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs -local table, string = table, string -local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match -local getmetatable, setmetatable = getmetatable, setmetatable -local getinfo = debug.getinfo - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashion we also hardly used pairs. --- --- Hm, actually ipairs was retained, but we no longer use it anyway. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - - -- for k, v in ipairs(t) do ... end - -- for k=1,#t do local v = t[k] ... end +lpeg.anywhere = anywhere - local function iterate(a,i) - i = i + 1 - local v = a[i] - if v ~= nil then - return i, v --, nil - end +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false end +end - function ipairs(a) - return iterate, a, 0 - end +function lpeg.splitter(pattern, action) + return (((1-P(pattern))^1)/action+1)^0 +end +function lpeg.tsplitter(pattern, action) + return Ct((((1-P(pattern))^1)/action+1)^0) end -if not pairs then +-- probleem: separator can be lpeg and that does not hash too well, but +-- it's quite okay as the key is then not garbage collected - -- for k, v in pairs(t) do ... end - -- for k, v in next, t do ... end +local splitters_s, splitters_m, splitters_t = { }, { }, { } - function pairs(t) - return next, t -- , nil +local function splitat(separator,single) + local splitter = (single and splitters_s[separator]) or splitters_m[separator] + if not splitter then + separator = P(separator) + local other = C((1 - separator)^0) + if single then + local any = anything + splitter = other * (separator * C(any^0) + "") -- ? + splitters_s[separator] = splitter + else + splitter = other * (separator * other)^0 + splitters_m[separator] = splitter + end end - + return splitter end --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then - table.unpack = _G.unpack -elseif not unpack then - _G.unpack = table.unpack +local function tsplitat(separator) + local splitter = splitters_t[separator] + if not splitter then + splitter = Ct(splitat(separator)) + splitters_t[separator] = splitter + end + return splitter end --- extra functions, some might go (when not used) +lpeg.splitat = splitat +lpeg.tsplitat = tsplitat -function table.strip(tab) - local lst, l = { }, 0 - for i=1,#tab do - local s = gsub(tab[i],"^%s*(.-)%s*$","%1") - if s == "" then - -- skip this one - else - l = l + 1 - lst[l] = s - end +function string.splitup(str,separator) + if not separator then + separator = "," end - return lst + return lpegmatch(splitters_m[separator] or splitat(separator),str) end -function table.keys(t) - if t then - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key - end - return keys - else - return { } - end -end +-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps -local function compare(a,b) - local ta, tb = type(a), type(b) -- needed, else 11 < 2 - if ta == tb then - return a < b - else - return tostring(a) < tostring(b) - end -end +local cache = { } -local function sortedkeys(tab) - if tab then - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 - else - category = 3 - end - end - end - if category == 0 or category == 3 then - sort(srt,compare) - else - sort(srt) - end - return srt - else - return { } +function lpeg.split(separator,str) + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c end + return lpegmatch(c,str) end -local function sortedhashkeys(tab) -- fast one - if tab then - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key - end +function string.split(str,separator) + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c end - sort(srt) - return srt + return lpegmatch(c,str) else - return { } + return { str } end end -function table.allkeys(t) - local keys = { } - for i=1,#t do - for k, v in next, t[i] do - keys[k] = true - end - end - return sortedkeys(keys) -end +local spacing = patterns.spacer^0 * newline -- sort of strip +local empty = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content = (empty + nonempty)^1 -table.sortedkeys = sortedkeys -table.sortedhashkeys = sortedhashkeys +patterns.textline = content -local function nothing() end +local linesplitter = tsplitat(newline) -local function sortedhash(t) - if t then - local n, s = 0, sortedkeys(t) -- the robust one - local function kv(s) - n = n + 1 - local k = s[n] - return k, t[k] - end - return kv, s - else - return nothing - end +patterns.linesplitter = linesplitter + +function string.splitlines(str) + return lpegmatch(linesplitter,str) end -table.sortedhash = sortedhash -table.sortedpairs = sortedhash +-- lpeg.splitters = cache -- no longer public -function table.append(t,list) - local n = #t - for i=1,#list do - n = n + 1 - t[n] = list[i] - end - return t -end +local cache = { } -function table.prepend(t, list) - local nl = #list - local nt = nl + #t - for i=#t,1,-1 do - t[nt] = t[i] - nt = nt - 1 - end - for i=1,#list do - t[i] = list[i] +function lpeg.checkedsplit(separator,str) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^1) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c end - return t + return lpegmatch(c,str) end -function table.merge(t, ...) -- first one is target - t = t or { } - local lst = { ... } - for i=1,#lst do - for k, v in next, lst[i] do - t[k] = v - end +function string.checkedsplit(str,separator) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^1) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c end - return t + return lpegmatch(c,str) end -function table.merged(...) - local tmp, lst = { }, { ... } - for i=1,#lst do - for k, v in next, lst[i] do - tmp[k] = v +-- from roberto's site: + +local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end +local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end +local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end + +local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 + +patterns.utf8byte = utf8byte + +--~ local str = " a b c d " + +--~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") + +local cache = { } + +function lpeg.stripper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs(((S(str)^1)/"" + 1)^0) + cache[str] = s end + return s + else + return Cs(((str^1)/"" + 1)^0) end - return tmp end -function table.imerge(t, ...) - local lst, nt = { ... }, #t - for i=1,#lst do - local nst = lst[i] - for j=1,#nst do - nt = nt + 1 - t[nt] = nst[j] +local cache = { } + +function lpeg.keeper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs((((1-S(str))^1)/"" + 1)^0) + cache[str] = s end + return s + else + return Cs((((1-str)^1)/"" + 1)^0) end - return t end -function table.imerged(...) - local tmp, ntmp, lst = { }, 0, {...} - for i=1,#lst do - local nst = lst[i] - for j=1,#nst do - ntmp = ntmp + 1 - tmp[ntmp] = nst[j] - end - end - return tmp +function lpeg.frontstripper(str) -- or pattern (yet undocumented) + return (P(str) + P(true)) * Cs(anything^0) end -local function fastcopy(old,metatabletoo) -- fast one - if old then - local new = { } - for k,v in next, old do - if type(v) == "table" then - new[k] = fastcopy(v,metatabletoo) -- was just table.copy - else - new[k] = v - end - end - if metatabletoo then - -- optional second arg - local mt = getmetatable(old) - if mt then - setmetatable(new,mt) - end - end - return new - else - return { } - end +function lpeg.endstripper(str) -- or pattern (yet undocumented) + return Cs((1 - P(str) * endofstring)^0) end --- todo : copy without metatable +-- Just for fun I looked at the used bytecode and +-- p = (p and p + pp) or pp gets one more (testset). -local function copy(t, tables) -- taken from lua wiki, slightly adapted - tables = tables or { } - local tcopy = {} - if not tables[t] then - tables[t] = tcopy - end - for i,v in next, t do -- brrr, what happens with sparse indexed - if type(i) == "table" then - if tables[i] then - i = tables[i] - else - i = copy(i, tables) +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys + local pattern + local u = isutf and utf8char or 1 + if type(one) == "table" then + local no = #one + local p = P(false) + if no == 0 then + for k, v in next, one do + p = p + P(k) / v + end + pattern = Cs((p + u)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + -- pattern = Cs(((1-one)^1 + one/two)^0) + pattern = Cs((one/two + u)^0) + else + for i=1,no do + local o = one[i] + p = p + P(o[1]) / o[2] end + pattern = Cs((p + u)^0) end - if type(v) ~= "table" then - tcopy[i] = v - elseif tables[v] then - tcopy[i] = tables[v] + else + pattern = Cs((P(one)/(two or "") + u)^0) + end + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern + end +end + +function lpeg.finder(lst,makefunction) + local pattern + if type(lst) == "table" then + pattern = P(false) + if #lst == 0 then + for k, v in next, lst do + pattern = pattern + P(k) -- ignore key, so we can use a replacer table + end else - tcopy[i] = copy(v, tables) + for i=1,#lst do + pattern = pattern + P(lst[i]) + end end + else + pattern = P(lst) end - local mt = getmetatable(t) - if mt then - setmetatable(tcopy,mt) + pattern = (1-pattern)^0 * pattern + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern end - return tcopy end -table.fastcopy = fastcopy -table.copy = copy +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) -function table.derive(parent) - local child = { } - if parent then - setmetatable(child,{ __index = parent }) +local splitters_f, splitters_s = { }, { } + +function lpeg.firstofsplit(separator) -- always return value + local splitter = splitters_f[separator] + if not splitter then + separator = P(separator) + splitter = C((1 - separator)^0) + splitters_f[separator] = splitter end - return child + return splitter end -function table.tohash(t,value) - local h = { } - if t then - if value == nil then value = true end - for _, v in next, t do -- no ipairs here - h[v] = value - end +function lpeg.secondofsplit(separator) -- nil if not split + local splitter = splitters_s[separator] + if not splitter then + separator = P(separator) + splitter = (1 - separator)^0 * separator * C(anything^0) + splitters_s[separator] = splitter end - return h + return splitter end -function table.fromhash(t) - local hsh, h = { }, 0 - for k, v in next, t do -- no ipairs here - if v then - h = h + 1 - hsh[h] = k - end +function lpeg.balancer(left,right) + left, right = P(left), P(right) + return P { left * ((1 - left - right) + V(1))^0 * right } +end + +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +-- -- slower: +-- +-- function lpeg.counter(pattern) +-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 +-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end + +local nany = utf8char/"" + +function lpeg.counter(pattern) + pattern = Cs((P(pattern)/" " + nany)^0) + return function(str) + return #lpegmatch(pattern,str) end - return hsh end -local noquotes, hexify, handle, reduce, compact, inline, functions +-- utf extensies -local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key - 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', - 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while', -} +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch = unicode and unicode.utf8.gmatch +local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char) -local function simple_table(t) - if #t > 0 then - local n = 0 - for _,v in next, t do - n = n + 1 +lpeg.UP = lpeg.P + +if utfcharacters then + + function lpeg.US(str) + local p = P(false) + for uc in utfcharacters(str) do + p = p + P(uc) end - if n == #t then - local tt, nt = { }, 0 - for i=1,#t do - local v = t[i] - local tv = type(v) - if tv == "number" then - nt = nt + 1 - if hexify then - tt[nt] = format("0x%04X",v) - else - tt[nt] = tostring(v) -- tostring not needed - end - elseif tv == "boolean" then - nt = nt + 1 - tt[nt] = tostring(v) - elseif tv == "string" then - nt = nt + 1 - tt[nt] = format("%q",v) - else - tt = nil - break - end - end - return tt + return p + end + + +elseif utfgmatch then + + function lpeg.US(str) + local p = P(false) + for uc in utfgmatch(str,".") do + p = p + P(uc) end + return p end - return nil -end --- Because this is a core function of mkiv I moved some function calls --- inline. --- --- twice as fast in a test: --- --- local propername = lpeg.P(lpeg.R("AZ","az","__") * lpeg.R("09","AZ","az", "__")^0 * lpeg.P(-1) ) +else --- problem: there no good number_to_string converter with the best resolution + function lpeg.US(str) + local p = P(false) + local f = function(uc) + p = p + P(uc) + end + lpegmatch((utf8char/f)^0,str) + return p + end -local function dummy() end +end -local function do_serialize(root,name,depth,level,indexed) - if level > 0 then - depth = depth .. " " - if indexed then - handle(format("%s{",depth)) - else - local tn = type(name) - if tn == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s[0x%04X]={",depth,name)) - else - handle(format("%s[%s]={",depth,name)) - end - elseif tn == "string" then - if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) - else - handle(format("%s[%q]={",depth,name)) - end - elseif tn == "boolean" then - handle(format("%s[%s]={",depth,tostring(name))) - else - handle(format("%s{",depth)) - end +local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture + +function lpeg.UR(str,more) + local first, last + if type(str) == "number" then + first = str + last = more or first + else + first, last = lpegmatch(range,str) + if not last then + return P(str) end end - -- we could check for k (index) being number (cardinal) - if root and next(root) then - -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) - -- if compact then - -- -- NOT: for k=1,#root do (we need to quit at nil) - -- for k,v in ipairs(root) do -- can we use next? - -- if not first then first = k end - -- last = last + 1 - -- end - -- end - local first, last = nil, 0 - if compact then - last = #root - for k=1,last do --- if not root[k] then - if root[k] == nil then - last = k - 1 - break - end - end - if last > 0 then - first = 1 - end + if first == last then + return P(str) + elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium + local p = P(false) + for i=first,last do + p = p + P(utfchar(i)) end - local sk = sortedkeys(root) - for i=1,#sk do - local k = sk[i] - local v = root[k] - --~ if v == root then - -- circular - --~ else - local t, tk = type(v), type(k) - if compact and first and tk == "number" and k >= first and k <= last then - if t == "number" then - if hexify then - handle(format("%s 0x%04X,",depth,v)) - else - handle(format("%s %s,",depth,v)) -- %.99g - end - elseif t == "string" then - if reduce and tonumber(v) then - handle(format("%s %s,",depth,v)) - else - handle(format("%s %q,",depth,v)) - end - elseif t == "table" then - if not next(v) then - handle(format("%s {},",depth)) - elseif inline then -- and #t > 0 - local st = simple_table(v) - if st then - handle(format("%s { %s },",depth,concat(st,", "))) - else - do_serialize(v,k,depth,level+1,true) - end - else - do_serialize(v,k,depth,level+1,true) - end - elseif t == "boolean" then - handle(format("%s %s,",depth,tostring(v))) - elseif t == "function" then - if functions then - handle(format('%s loadstring(%q),',depth,dump(v))) - else - handle(format('%s "function",',depth)) - end - else - handle(format("%s %q,",depth,tostring(v))) - end - elseif k == "__p__" then -- parent - if false then - handle(format("%s __p__=nil,",depth)) - end - elseif t == "number" then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) - else - handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g - end - elseif tk == "boolean" then - if hexify then - handle(format("%s [%s]=0x%04X,",depth,tostring(k),v)) - else - handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g - end - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - if hexify then - handle(format("%s %s=0x%04X,",depth,k,v)) - else - handle(format("%s %s=%s,",depth,k,v)) -- %.99g - end - else - if hexify then - handle(format("%s [%q]=0x%04X,",depth,k,v)) - else - handle(format("%s [%q]=%s,",depth,k,v)) -- %.99g - end - end - elseif t == "string" then - if reduce and tonumber(v) then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%s,",depth,k,v)) - else - handle(format("%s [%s]=%s,",depth,k,v)) - end - elseif tk == "boolean" then - handle(format("%s [%s]=%s,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%s,",depth,k,v)) - else - handle(format("%s [%q]=%s,",depth,k,v)) - end - else - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%q,",depth,k,v)) - else - handle(format("%s [%s]=%q,",depth,k,v)) - end - elseif tk == "boolean" then - handle(format("%s [%s]=%q,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%q,",depth,k,v)) - else - handle(format("%s [%q]=%q,",depth,k,v)) - end - end - elseif t == "table" then - if not next(v) then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]={},",depth,k)) - else - handle(format("%s [%s]={},",depth,k)) - end - elseif tk == "boolean" then - handle(format("%s [%s]={},",depth,tostring(k))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s={},",depth,k)) - else - handle(format("%s [%q]={},",depth,k)) - end - elseif inline then - local st = simple_table(v) - if st then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) - else - handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) - end - elseif tk == "boolean" then -- or find(k,"^%d+$") then - handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s={ %s },",depth,k,concat(st,", "))) - else - handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) - end - else - do_serialize(v,k,depth,level+1) - end + return p -- nil when invalid range + else + local f = function(b) + return b >= first and b <= last + end + -- tricky, these nested captures + return utf8byte / f -- nil when invalid range + end +end + +-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) + +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) + +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) + +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order + if type(list) ~= "table" then + list = { list, ... } + end + -- table.sort(list) -- longest match first + local p = P(list[1]) + for l=2,#list do + p = p + P(list[l]) + end + return p +end + +-- For the moment here, but it might move to utilities. Beware, we need to +-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we +-- loop back from the end cq. prepend. + +local sort = table.sort + +local function copyindexed(old) + local new = { } + for i=1,#old do + new[i] = old + end + return new +end + +local function sortedkeys(tab) + local keys, s = { }, 0 + for key,_ in next, tab do + s = s + 1 + keys[s] = key + end + sort(keys) + return keys +end + +function lpeg.append(list,pp,delayed,checked) + local p = pp + if #list > 0 then + local keys = copyindexed(list) + sort(keys) + for i=#keys,1,-1 do + local k = keys[i] + if p then + p = P(k) + p + else + p = P(k) + end + end + elseif delayed then -- hm, it looks like the lpeg parser resolves anyway + local keys = sortedkeys(list) + if p then + for i=1,#keys,1 do + local k = keys[i] + local v = list[k] + p = P(k)/list + p + end + else + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k) + p else - do_serialize(v,k,depth,level+1) + p = P(k) end - elseif t == "boolean" then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) - else - handle(format("%s [%s]=%s,",depth,k,tostring(v))) - end - elseif tk == "boolean" then -- or find(k,"^%d+$") then - handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%s,",depth,k,tostring(v))) + end + if p then + p = p / list + end + end + elseif checked then + -- problem: substitution gives a capture + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + if k == v then + p = P(k) + p else - handle(format("%s [%q]=%s,",depth,k,tostring(v))) - end - elseif t == "function" then - if functions then - local f = getinfo(v).what == "C" and dump(dummy) or dump(v) - -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) - else - handle(format("%s [%s]=loadstring(%q),",depth,k,f)) - end - elseif tk == "boolean" then - handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=loadstring(%q),",depth,k,f)) - else - handle(format("%s [%q]=loadstring(%q),",depth,k,f)) - end + p = P(k)/v + p end else - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) - else - handle(format("%s [%s]=%q,",depth,k,tostring(v))) - end - elseif tk == "boolean" then -- or find(k,"^%d+$") then - handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%q,",depth,k,tostring(v))) + if k == v then + p = P(k) else - handle(format("%s [%q]=%q,",depth,k,tostring(v))) + p = P(k)/v end end - --~ end + end + else + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k)/v + p + else + p = P(k)/v + end end end - if level > 0 then - handle(format("%s},",depth)) - end + return p end --- replacing handle by a direct t[#t+1] = ... (plus test) is not much --- faster (0.03 on 1.00 for zapfino.tma) - -local function serialize(_handle,root,name,specification) -- handle wins - local tname = type(name) - if type(specification) == "table" then - noquotes = specification.noquotes - hexify = specification.hexify - handle = _handle or specification.handle or print - reduce = specification.reduce or false - functions = specification.functions - compact = specification.compact - inline = specification.inline and compact - if functions == nil then - functions = true - end - if compact == nil then - compact = true - end - if inline == nil then - inline = compact - end - else - noquotes = false - hexify = false - handle = _handle or print - reduce = false - compact = true - inline = true - functions = true - end - if tname == "string" then - if name == "return" then - handle("return {") - else - handle(name .. "={") - end - elseif tname == "number" then - if hexify then - handle(format("[0x%04X]={",name)) - else - handle("[" .. name .. "]={") - end - elseif tname == "boolean" then - if name then - handle("return {") - else - handle("{") - end - else - handle("t={") - end - if root then - -- The dummy access will initialize a table that has a delayed initialization - -- using a metatable. (maybe explicitly test for metatable) - if getmetatable(root) then -- todo: make this an option, maybe even per subtable - local dummy = root._w_h_a_t_e_v_e_r_ - root._w_h_a_t_e_v_e_r_ = nil - end - -- Let's forget about empty tables. - if next(root) then - do_serialize(root,name,"",0) - end - end - handle("}") -end - ---~ name: ---~ ---~ true : return { } ---~ false : { } ---~ nil : t = { } ---~ string : string = { } ---~ 'return' : return { } ---~ number : [number] = { } - -function table.serialize(root,name,specification) - local t, n = { }, 0 - local function flush(s) - n = n + 1 - t[n] = s - end - serialize(flush,root,name,specification) - return concat(t,"\n") -end - -table.tohandle = serialize +-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) +-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) --- sometimes tables are real use (zapfino extra pro is some 85M) in which --- case a stepwise serialization is nice; actually, we could consider: --- --- for line in table.serializer(root,name,reduce,noquotes) do --- ...(line) +-- function lpeg.exact_match(words,case_insensitive) +-- local pattern = concat(words) +-- if case_insensitive then +-- local pattern = S(upper(characters)) + S(lower(characters)) +-- local list = { } +-- for i=1,#words do +-- list[lower(words[i])] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[lower(s)] and i +-- end) +-- else +-- local pattern = S(concat(words)) +-- local list = { } +-- for i=1,#words do +-- list[words[i]] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[s] and i +-- end) +-- end -- end --- --- so this is on the todo list - -local maxtab = 2*1024 -function table.tofile(filename,root,name,specification) - local f = io.open(filename,'w') - if f then - if maxtab > 1 then - local t, n = { }, 0 - local function flush(s) - n = n + 1 - t[n] = s - if n > maxtab then - f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice - t, n = { }, 0 -- we could recycle t if needed - end - end - serialize(flush,root,name,specification) - f:write(concat(t,"\n"),"\n") - else - local function flush(s) - f:write(s,"\n") - end - serialize(flush,root,name,specification) - end - f:close() - io.flush() - end -end +-- experiment: -local function flattened(t,f,depth) - if f == nil then - f = { } - depth = 0xFFFF - elseif tonumber(f) then - -- assume that only two arguments are given - depth = f - f = { } - elseif not depth then - depth = 0xFFFF - end - for k, v in next, t do - if type(k) ~= "number" then - if depth > 0 and type(v) == "table" then - flattened(v,f,depth-1) - else - f[k] = v - end - end - end - local n = #f - for k=1,#t do +local function make(t) + local p + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] local v = t[k] - if depth > 0 and type(v) == "table" then - flattened(v,f,depth-1) - n = #f - else - n = n + 1 - f[n] = v - end - end - return f -end - -table.flattened = flattened - -local function unnest(t,f) -- only used in mk, for old times sake - if not f then -- and only relevant for token lists - f = { } - end - for i=1,#t do - local v = t[i] - if type(v) == "table" then - if type(v[1]) == "table" then - unnest(v,f) + if not p then + if next(v) then + p = P(k) * make(v) else - f[#f+1] = v + p = P(k) end else - f[#f+1] = v - end - end - return f -end - -function table.unnest(t) -- bad name - return unnest(t) -end - -local function are_equal(a,b,n,m) -- indexed - if a and b and #a == #b then - n = n or 1 - m = m or #a - for i=n,m do - local ai, bi = a[i], b[i] - if ai==bi then - -- same - elseif type(ai)=="table" and type(bi)=="table" then - if not are_equal(ai,bi) then - return false - end + if next(v) then + p = p + P(k) * make(v) else - return false + p = p + P(k) end end - return true - else - return false end + return p end -local function identical(a,b) -- assumes same structure - for ka, va in next, a do - local vb = b[ka] - if va == vb then - -- same - elseif type(va) == "table" and type(vb) == "table" then - if not identical(va,vb) then - return false +function lpeg.utfchartabletopattern(list) -- goes to util-lpg + local tree = { } + for i=1,#list do + local t = tree + for c in gmatch(list[i],".") do + if not t[c] then + t[c] = { } end - else - return false + t = t[c] end end - return true + return make(tree) end -table.identical = identical -table.are_equal = are_equal +-- inspect ( lpeg.utfchartabletopattern { +-- utfchar(0x00A0), -- nbsp +-- utfchar(0x2000), -- enquad +-- utfchar(0x2001), -- emquad +-- utfchar(0x2002), -- enspace +-- utfchar(0x2003), -- emspace +-- utfchar(0x2004), -- threeperemspace +-- utfchar(0x2005), -- fourperemspace +-- utfchar(0x2006), -- sixperemspace +-- utfchar(0x2007), -- figurespace +-- utfchar(0x2008), -- punctuationspace +-- utfchar(0x2009), -- breakablethinspace +-- utfchar(0x200A), -- hairspace +-- utfchar(0x200B), -- zerowidthspace +-- utfchar(0x202F), -- narrownobreakspace +-- utfchar(0x205F), -- math thinspace +-- } ) --- maybe also make a combined one +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 -function table.compact(t) - if t then - for k,v in next, t do - if not next(v) then - t[k] = nil - end - end - end +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol + +end -- closure + +do -- begin closure to overcome local limits and interference + +if not modules then modules = { } end modules ['l-functions'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +functions = functions or { } + +function functions.dummy() end + +end -- closure + +do -- begin closure to overcome local limits and interference + +if not modules then modules = { } end modules ['l-string'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local string = string +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs + +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). + +-- if not string.split then +-- +-- function string.split(str,pattern) +-- local t = { } +-- if #str > 0 then +-- local n = 1 +-- for s in gmatch(str..pattern,"(.-)"..pattern) do +-- t[n] = s +-- n = n + 1 +-- end +-- end +-- return t +-- end +-- +-- end + +-- function string.unquoted(str) +-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end + +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote + + patterns.dquote * C(patterns.nodquote) * patterns.dquote + +function string.unquoted(str) + return lpegmatch(unquoted,str) or str end -function table.contains(t, v) - if t then - for i=1, #t do - if t[i] == v then - return i - end - end - end - return false +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) + +function string.quoted(str) + return format("%q",str) -- always " end -function table.count(t) +function string.count(str,pattern) -- variant 3 local n = 0 - for k, v in next, t do + for _ in gmatch(str,pattern) do -- not for utf n = n + 1 end return n end -function table.swapped(t,s) -- hash - local n = { } - if s then - for k, v in next, s do - n[k] = v - end - end - for k, v in next, t do - n[v] = k +function string.limit(str,n,sentinel) -- not utf proof + if #str > n then + sentinel = sentinel or "..." + return sub(str,1,(n-#sentinel)) .. sentinel + else + return str end - return n end -function table.mirror(t) -- hash - local n = { } - for k, v in next, t do - n[v] = k - n[k] = v - end - return n -end +local stripper = patterns.stripper +local collapser = patterns.collapser -function table.reversed(t) - if t then - local tt, tn = { }, #t - if tn > 0 then - local ttn = 0 - for i=tn,1,-1 do - ttn = ttn + 1 - tt[ttn] = t[i] - end - end - return tt - end +function string.strip(str) + return lpegmatch(stripper,str) or "" end -function table.sequenced(t,sep) -- hash only - if t then - local s, n = { }, 0 - for k, v in sortedhash(t) do - if simple then - if v == true then - n = n + 1 - s[n] = k - elseif v and v~= "" then - n = n + 1 - s[n] = k .. "=" .. tostring(v) - end - else - n = n + 1 - s[n] = k .. "=" .. tostring(v) - end - end - return concat(s, sep or " | ") - else - return "" - end +function string.collapsespaces(str) + return lpegmatch(collapser,str) or "" end -function table.print(t,...) - if type(t) ~= "table" then - print(tostring(t)) +-- function string.is_empty(str) +-- return not find(str,"%S") +-- end + +local pattern = P(" ")^0 * P(-1) + +function string.is_empty(str) + if str == "" then + return true else - table.tohandle(print,t,...) + return lpegmatch(pattern,str) and true or false end end --- -- -- obsolete but we keep them for a while and might comment them later -- -- -- --- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) +-- if not string.escapedpattern then +-- +-- local patterns_escapes = { +-- ["%"] = "%%", +-- ["."] = "%.", +-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +-- ["["] = "%[", ["]"] = "%]", +-- ["("] = "%(", [")"] = "%)", +-- -- ["{"] = "%{", ["}"] = "%}" +-- -- ["^"] = "%^", ["$"] = "%$", +-- } +-- +-- local simple_escapes = { +-- ["-"] = "%-", +-- ["."] = "%.", +-- ["?"] = ".", +-- ["*"] = ".*", +-- } +-- +-- function string.escapedpattern(str,simple) +-- return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +-- end +-- +-- function string.topattern(str,lowercase,strict) +-- if str == "" then +-- return ".*" +-- else +-- str = gsub(str,".",simple_escapes) +-- if lowercase then +-- str = lower(str) +-- end +-- if strict then +-- return "^" .. str .. "$" +-- else +-- return str +-- end +-- end +-- end +-- +-- end -function table.sub(t,i,j) - return { unpack(t,i,j) } -end +--- needs checking --- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) +local anything = patterns.anything +local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?") -- wildcard and single match -function table.is_empty(t) - return not t or not next(t) +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") ) + +function string.escapedpattern(str,simple) + return lpegmatch(simple and pattern_b or pattern_a,str) end -function table.has_one_entry(t) - return t and not next(t,next(t)) +function string.topattern(str,lowercase,strict) + if str == "" then + return ".*" + elseif strict then + str = lpegmatch(pattern_c,str) + else + str = lpegmatch(pattern_b,str) + end + if lowercase then + return lower(str) + else + return str + end end --- new +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern ("12+34*.tex",false,false)) +-- print(string.topattern ("12+34*.tex",false,true)) -function table.loweredkeys(t) -- maybe utf - local l = { } - for k, v in next, t do - l[lower(k)] = v - end - return l +function string.valid(str,default) + return (type(str) == "string" and str ~= "" and str) or default or nil end --- new, might move (maybe duplicate) +-- handy fallback -function table.unique(old) - local hash = { } - local new = { } - local n = 0 - for i=1,#old do - local oi = old[i] - if not hash[oi] then - n = n + 1 - new[n] = oi - hash[oi] = true - end - end - return new +string.itself = function(s) return s end + +-- also handy (see utf variant) + +local pattern = Ct(C(1)^0) -- string and not utf ! + +function string.totable(str) + return lpegmatch(pattern,str) end -function table.sorted(t,...) - sort(t,...) - return t -- still sorts in-place +-- handy from within tex: + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) end +-- obsolete names: + +string.quote = string.quoted +string.unquote = string.unquoted end -- closure do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['l-lpeg'] = { +if not modules then modules = { } end modules ['l-table'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -1180,874 +1017,1022 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select +local table, string = table, string +local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove +local format, lower, dump = string.format, string.lower, string.dump +local getmetatable, setmetatable = getmetatable, setmetatable +local getinfo = debug.getinfo +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor --- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) - -local lpeg = require("lpeg") +-- extra functions, some might go (when not used) --- tracing (only used when we encounter a problem in integration of lpeg in luatex) +local stripper = patterns.stripper --- some code will move to unicode and string +function table.strip(tab) + local lst, l = { }, 0 + for i=1,#tab do + local s = lpegmatch(stripper,tab[i]) or "" + if s == "" then + -- skip this one + else + l = l + 1 + lst[l] = s + end + end + return lst +end -local report = texio and texio.write_nl or print +function table.keys(t) + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } + end +end --- local lpmatch = lpeg.match --- local lpprint = lpeg.print --- local lpp = lpeg.P --- local lpr = lpeg.R --- local lps = lpeg.S --- local lpc = lpeg.C --- local lpb = lpeg.B --- local lpv = lpeg.V --- local lpcf = lpeg.Cf --- local lpcb = lpeg.Cb --- local lpcg = lpeg.Cg --- local lpct = lpeg.Ct --- local lpcs = lpeg.Cs --- local lpcc = lpeg.Cc --- local lpcmt = lpeg.Cmt --- local lpcarg = lpeg.Carg +local function compare(a,b) + local ta, tb = type(a), type(b) -- needed, else 11 < 2 + if ta == tb then + return a < b + else + return tostring(a) < tostring(b) + end +end --- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end +local function sortedkeys(tab) + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check + else + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end + end + end + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt + else + return { } + end +end --- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end --- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end --- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end --- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end --- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end --- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end --- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end --- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end --- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end --- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end --- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end --- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end --- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end --- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end +local function sortedhashkeys(tab) -- fast one + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end + end + sort(srt) + return srt + else + return { } + end +end -local type, next = type, next -local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format +function table.allkeys(t) + local keys = { } + for i=1,#t do + for k, v in next, t[i] do + keys[k] = true + end + end + return sortedkeys(keys) +end --- Beware, we predefine a bunch of patterns here and one reason for doing so --- is that we get consistent behaviour in some of the visualizers. +table.sortedkeys = sortedkeys +table.sortedhashkeys = sortedhashkeys -lpeg.patterns = lpeg.patterns or { } -- so that we can share -local patterns = lpeg.patterns +local function nothing() end -local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp -local lpegtype, lpegmatch = lpeg.type, lpeg.match +local function sortedhash(t) + if t then + local n, s = 0, sortedkeys(t) -- the robust one + local function kv(s) + n = n + 1 + local k = s[n] + return k, t[k] + end + return kv, s + else + return nothing + end +end -local utfcharacters = string.utfcharacters -local utfgmatch = unicode and unicode.utf8.gmatch +table.sortedhash = sortedhash +table.sortedpairs = sortedhash -- obsolete -local anything = P(1) -local endofstring = P(-1) -local alwaysmatched = P(true) +function table.append(t,list) + local n = #t + for i=1,#list do + n = n + 1 + t[n] = list[i] + end + return t +end -patterns.anything = anything -patterns.endofstring = endofstring -patterns.beginofstring = alwaysmatched -patterns.alwaysmatched = alwaysmatched +function table.prepend(t, list) + local nl = #list + local nt = nl + #t + for i=#t,1,-1 do + t[nt] = t[i] + nt = nt - 1 + end + for i=1,#list do + t[i] = list[i] + end + return t +end -local digit, sign = R('09'), S('+-') -local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") -local newline = crlf + S("\r\n") -- cr + lf -local escaped = P("\\") * anything -local squote = P("'") -local dquote = P('"') -local space = P(" ") +-- function table.merge(t, ...) -- first one is target +-- t = t or { } +-- local lst = { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- t[k] = v +-- end +-- end +-- return t +-- end -local utfbom_32_be = P('\000\000\254\255') -local utfbom_32_le = P('\255\254\000\000') -local utfbom_16_be = P('\255\254') -local utfbom_16_le = P('\254\255') -local utfbom_8 = P('\239\187\191') -local utfbom = utfbom_32_be + utfbom_32_le - + utfbom_16_be + utfbom_16_le - + utfbom_8 -local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le" - + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le" - + utfbom_8 / "utf-8" + alwaysmatched / "unknown" +function table.merge(t, ...) -- first one is target + t = t or { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v + end + end + return t +end -local utf8next = R("\128\191") +-- function table.merged(...) +-- local tmp, lst = { }, { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- tmp[k] = v +-- end +-- end +-- return tmp +-- end -patterns.utf8one = R("\000\127") -patterns.utf8two = R("\194\223") * utf8next -patterns.utf8three = R("\224\239") * utf8next * utf8next -patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next -patterns.utfbom = utfbom -patterns.utftype = utftype +function table.merged(...) + local t = { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v + end + end + return t +end -local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four -local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) +-- function table.imerge(t, ...) +-- local lst, nt = { ... }, #t +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- nt = nt + 1 +-- t[nt] = nst[j] +-- end +-- end +-- return t +-- end -patterns.utf8 = utf8char -patterns.utf8char = utf8char -patterns.validutf8 = validutf8char -patterns.validutf8char = validutf8char +function table.imerge(t, ...) + local nt = #t + for i=1,select("#",...) do + local nst = select(i,...) + for j=1,#nst do + nt = nt + 1 + t[nt] = nst[j] + end + end + return t +end -local eol = S("\n\r") -local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) -local whitespace = eol + spacer +-- function table.imerged(...) +-- local tmp, ntmp, lst = { }, 0, {...} +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- ntmp = ntmp + 1 +-- tmp[ntmp] = nst[j] +-- end +-- end +-- return tmp +-- end -patterns.digit = digit -patterns.sign = sign -patterns.cardinal = sign^0 * digit^1 -patterns.integer = sign^0 * digit^1 -patterns.unsigned = digit^0 * P('.') * digit^1 -patterns.float = sign^0 * patterns.unsigned -patterns.cunsigned = digit^0 * P(',') * digit^1 -patterns.cfloat = sign^0 * patterns.cunsigned -patterns.number = patterns.float + patterns.integer -patterns.cnumber = patterns.cfloat + patterns.integer -patterns.oct = P("0") * R("07")^1 -patterns.octal = patterns.oct -patterns.HEX = P("0x") * R("09","AF")^1 -patterns.hex = P("0x") * R("09","af")^1 -patterns.hexadecimal = P("0x") * R("09","AF","af")^1 -patterns.lowercase = R("az") -patterns.uppercase = R("AZ") -patterns.letter = patterns.lowercase + patterns.uppercase -patterns.space = space -patterns.tab = P("\t") -patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = eol -patterns.spacer = spacer -patterns.whitespace = whitespace -patterns.newline = newline -patterns.emptyline = newline^1 -patterns.nonspacer = 1 - spacer -patterns.nonwhitespace = 1 - whitespace -patterns.equal = P("=") -patterns.comma = P(",") -patterns.commaspacer = P(",") * spacer^0 -patterns.period = P(".") -patterns.colon = P(":") -patterns.semicolon = P(";") -patterns.underscore = P("_") -patterns.escaped = escaped -patterns.squote = squote -patterns.dquote = dquote -patterns.nosquote = (escaped + (1-squote))^0 -patterns.nodquote = (escaped + (1-dquote))^0 -patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble -patterns.unspacer = ((patterns.spacer^1)/"")^0 +function table.imerged(...) + local tmp, ntmp = { }, 0 + for i=1,select("#",...) do + local nst = select(i,...) + for j=1,#nst do + ntmp = ntmp + 1 + tmp[ntmp] = nst[j] + end + end + return tmp +end -patterns.singlequoted = squote * patterns.nosquote * squote -patterns.doublequoted = dquote * patterns.nodquote * dquote -patterns.quoted = patterns.doublequoted + patterns.singlequoted - -patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 -patterns.beginline = #(1-newline) +local function fastcopy(old,metatabletoo) -- fast one + if old then + local new = { } + for k, v in next, old do + if type(v) == "table" then + new[k] = fastcopy(v,metatabletoo) -- was just table.copy + else + new[k] = v + end + end + if metatabletoo then + -- optional second arg + local mt = getmetatable(old) + if mt then + setmetatable(new,mt) + end + end + return new + else + return { } + end +end --- print(string.unquoted("test")) --- print(string.unquoted([["t\"est"]])) --- print(string.unquoted([["t\"est"x]])) --- print(string.unquoted("\'test\'")) --- print(string.unquoted('"test"')) --- print(string.unquoted('"test"')) +-- todo : copy without metatable -local function anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } +local function copy(t, tables) -- taken from lua wiki, slightly adapted + tables = tables or { } + local tcopy = {} + if not tables[t] then + tables[t] = tcopy + end + for i,v in next, t do -- brrr, what happens with sparse indexed + if type(i) == "table" then + if tables[i] then + i = tables[i] + else + i = copy(i, tables) + end + end + if type(v) ~= "table" then + tcopy[i] = v + elseif tables[v] then + tcopy[i] = tables[v] + else + tcopy[i] = copy(v, tables) + end + end + local mt = getmetatable(t) + if mt then + setmetatable(tcopy,mt) + end + return tcopy end -lpeg.anywhere = anywhere +table.fastcopy = fastcopy +table.copy = copy -function lpeg.instringchecker(p) - p = anywhere(p) - return function(str) - return lpegmatch(p,str) and true or false +function table.derive(parent) -- for the moment not public + local child = { } + if parent then + setmetatable(child,{ __index = parent }) end + return child end -function lpeg.splitter(pattern, action) - return (((1-P(pattern))^1)/action+1)^0 +function table.tohash(t,value) + local h = { } + if t then + if value == nil then value = true end + for _, v in next, t do -- no ipairs here + h[v] = value + end + end + return h end -function lpeg.tsplitter(pattern, action) - return Ct((((1-P(pattern))^1)/action+1)^0) +function table.fromhash(t) + local hsh, h = { }, 0 + for k, v in next, t do -- no ipairs here + if v then + h = h + 1 + hsh[h] = k + end + end + return hsh end --- probleem: separator can be lpeg and that does not hash too well, but --- it's quite okay as the key is then not garbage collected +local noquotes, hexify, handle, reduce, compact, inline, functions -local splitters_s, splitters_m, splitters_t = { }, { }, { } +local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key + 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', + 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while', +} -local function splitat(separator,single) - local splitter = (single and splitters_s[separator]) or splitters_m[separator] - if not splitter then - separator = P(separator) - local other = C((1 - separator)^0) - if single then - local any = anything - splitter = other * (separator * C(any^0) + "") -- ? - splitters_s[separator] = splitter - else - splitter = other * (separator * other)^0 - splitters_m[separator] = splitter +local function simple_table(t) + if #t > 0 then + local n = 0 + for _,v in next, t do + n = n + 1 + end + if n == #t then + local tt, nt = { }, 0 + for i=1,#t do + local v = t[i] + local tv = type(v) + if tv == "number" then + nt = nt + 1 + if hexify then + tt[nt] = format("0x%04X",v) + else + tt[nt] = tostring(v) -- tostring not needed + end + elseif tv == "boolean" then + nt = nt + 1 + tt[nt] = tostring(v) + elseif tv == "string" then + nt = nt + 1 + tt[nt] = format("%q",v) + else + tt = nil + break + end + end + return tt end end - return splitter + return nil end -local function tsplitat(separator) - local splitter = splitters_t[separator] - if not splitter then - splitter = Ct(splitat(separator)) - splitters_t[separator] = splitter - end - return splitter -end +-- Because this is a core function of mkiv I moved some function calls +-- inline. +-- +-- twice as fast in a test: +-- +-- local propername = lpeg.P(lpeg.R("AZ","az","__") * lpeg.R("09","AZ","az", "__")^0 * lpeg.P(-1) ) -lpeg.splitat = splitat -lpeg.tsplitat = tsplitat +-- problem: there no good number_to_string converter with the best resolution -function string.splitup(str,separator) - if not separator then - separator = "," - end - return lpegmatch(splitters_m[separator] or splitat(separator),str) -end +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps +-- todo: %g faster on numbers than %s -local cache = { } +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") -function lpeg.split(separator,str) - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c - end - return lpegmatch(c,str) -end +local function dummy() end -function string.split(str,separator) - if separator then - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c +local function do_serialize(root,name,depth,level,indexed) + if level > 0 then + depth = depth .. " " + if indexed then + handle(format("%s{",depth)) + else + local tn = type(name) + if tn == "number" then + if hexify then + handle(format("%s[0x%04X]={",depth,name)) + else + handle(format("%s[%s]={",depth,name)) + end + elseif tn == "string" then + if noquotes and not reserved[name] and lpegmatch(propername,name) then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) + else + handle(format("%s{",depth)) + end + end + end + -- we could check for k (index) being number (cardinal) + if root and next(root) then + -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- if compact then + -- -- NOT: for k=1,#root do (we need to quit at nil) + -- for k,v in ipairs(root) do -- can we use next? + -- if not first then first = k end + -- last = last + 1 + -- end + -- end + local first, last = nil, 0 + if compact then + last = #root + for k=1,last do + if root[k] == nil then + last = k - 1 + break + end + end + if last > 0 then + first = 1 + end + end + local sk = sortedkeys(root) + for i=1,#sk do + local k = sk[i] + local v = root[k] + --~ if v == root then + -- circular + --~ else + local t, tk = type(v), type(k) + if compact and first and tk == "number" and k >= first and k <= last then + if t == "number" then + if hexify then + handle(format("%s 0x%04X,",depth,v)) + else + handle(format("%s %s,",depth,v)) -- %.99g + end + elseif t == "string" then + if reduce and tonumber(v) then + handle(format("%s %s,",depth,v)) + else + handle(format("%s %q,",depth,v)) + end + elseif t == "table" then + if not next(v) then + handle(format("%s {},",depth)) + elseif inline then -- and #t > 0 + local st = simple_table(v) + if st then + handle(format("%s { %s },",depth,concat(st,", "))) + else + do_serialize(v,k,depth,level+1,true) + end + else + do_serialize(v,k,depth,level+1,true) + end + elseif t == "boolean" then + handle(format("%s %s,",depth,tostring(v))) + elseif t == "function" then + if functions then + handle(format('%s load(%q),',depth,dump(v))) + else + handle(format('%s "function",',depth)) + end + else + handle(format("%s %q,",depth,tostring(v))) + end + elseif k == "__p__" then -- parent + if false then + handle(format("%s __p__=nil,",depth)) + end + elseif t == "number" then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) + else + handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g + end + elseif tk == "boolean" then + if hexify then + handle(format("%s [%s]=0x%04X,",depth,tostring(k),v)) + else + handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g + end + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + if hexify then + handle(format("%s %s=0x%04X,",depth,k,v)) + else + handle(format("%s %s=%s,",depth,k,v)) -- %.99g + end + else + if hexify then + handle(format("%s [%q]=0x%04X,",depth,k,v)) + else + handle(format("%s [%q]=%s,",depth,k,v)) -- %.99g + end + end + elseif t == "string" then + if reduce and tonumber(v) then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%s,",depth,k,v)) + else + handle(format("%s [%s]=%s,",depth,k,v)) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%s,",depth,tostring(k),v)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%s,",depth,k,v)) + else + handle(format("%s [%q]=%s,",depth,k,v)) + end + else + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%q,",depth,k,v)) + else + handle(format("%s [%s]=%q,",depth,k,v)) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%q,",depth,tostring(k),v)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%q,",depth,k,v)) + else + handle(format("%s [%q]=%q,",depth,k,v)) + end + end + elseif t == "table" then + if not next(v) then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]={},",depth,k)) + else + handle(format("%s [%s]={},",depth,k)) + end + elseif tk == "boolean" then + handle(format("%s [%s]={},",depth,tostring(k))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s={},",depth,k)) + else + handle(format("%s [%q]={},",depth,k)) + end + elseif inline then + local st = simple_table(v) + if st then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) + else + handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) + end + elseif tk == "boolean" then + handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s={ %s },",depth,k,concat(st,", "))) + else + handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) + end + else + do_serialize(v,k,depth,level+1) + end + else + do_serialize(v,k,depth,level+1) + end + elseif t == "boolean" then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) + else + handle(format("%s [%s]=%s,",depth,k,tostring(v))) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%s,",depth,k,tostring(v))) + else + handle(format("%s [%q]=%s,",depth,k,tostring(v))) + end + elseif t == "function" then + if functions then + local f = getinfo(v).what == "C" and dump(dummy) or dump(v) + -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=load(%q),",depth,k,f)) + else + handle(format("%s [%s]=load(%q),",depth,k,f)) + end + elseif tk == "boolean" then + handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=load(%q),",depth,k,f)) + else + handle(format("%s [%q]=load(%q),",depth,k,f)) + end + end + else + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) + else + handle(format("%s [%s]=%q,",depth,k,tostring(v))) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%q,",depth,k,tostring(v))) + else + handle(format("%s [%q]=%q,",depth,k,tostring(v))) + end + end + --~ end end - return lpegmatch(c,str) - else - return { str } - end -end - -local spacing = patterns.spacer^0 * newline -- sort of strip -local empty = spacing * Cc("") -local nonempty = Cs((1-spacing)^1) * spacing^-1 -local content = (empty + nonempty)^1 - -patterns.textline = content - ---~ local linesplitter = Ct(content^0) ---~ ---~ function string.splitlines(str) ---~ return lpegmatch(linesplitter,str) ---~ end - -local linesplitter = tsplitat(newline) - -patterns.linesplitter = linesplitter - -function string.splitlines(str) - return lpegmatch(linesplitter,str) -end - -local utflinesplitter = utfbom^-1 * tsplitat(newline) - -patterns.utflinesplitter = utflinesplitter - -function string.utfsplitlines(str) - return lpegmatch(utflinesplitter,str or "") -end - -local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) -local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) - -function string.utfsplit(str,ignorewhitespace) -- new - if ignorewhitespace then - return lpegmatch(utfcharsplitter_iws,str or "") - else - return lpegmatch(utfcharsplitter_ows,str or "") - end -end - --- inspect(string.utfsplit("a b c d")) --- inspect(string.utfsplit("a b c d",true)) - --- -- alternative 1: 0.77 --- --- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) --- --- function string.utflength(str) --- return #lpegmatch(utfcharcounter,str or "") --- end --- --- -- alternative 2: 1.70 --- --- local n = 0 --- --- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow --- --- function string.utflength(str) --- n = 0 --- lpegmatch(utfcharcounter,str or "") --- return n --- end --- --- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) - -local n = 0 - -local utfcharcounter = utfbom^-1 * Cs ( ( - Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end - + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end - + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end - + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end -)^0 ) - -function string.utflength(str) - n = 0 - lpegmatch(utfcharcounter,str or "") - return n -end - ---~ lpeg.splitters = cache -- no longer public - -local cache = { } - -function lpeg.checkedsplit(separator,str) - local c = cache[separator] - if not c then - separator = P(separator) - local other = C((1 - separator)^1) - c = Ct(separator^0 * other * (separator^1 * other)^0) - cache[separator] = c end - return lpegmatch(c,str) -end - -function string.checkedsplit(str,separator) - local c = cache[separator] - if not c then - separator = P(separator) - local other = C((1 - separator)^1) - c = Ct(separator^0 * other * (separator^1 * other)^0) - cache[separator] = c - end - return lpegmatch(c,str) -end - ---~ from roberto's site: - -local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end -local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end -local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end - -local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 - -patterns.utf8byte = utf8byte - ---~ local str = " a b c d " - ---~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") - -local cache = { } - -function lpeg.stripper(str) - if type(str) == "string" then - local s = cache[str] - if not s then - s = Cs(((S(str)^1)/"" + 1)^0) - cache[str] = s - end - return s - else - return Cs(((str^1)/"" + 1)^0) + if level > 0 then + handle(format("%s},",depth)) end end -local cache = { } +-- replacing handle by a direct t[#t+1] = ... (plus test) is not much +-- faster (0.03 on 1.00 for zapfino.tma) -function lpeg.keeper(str) - if type(str) == "string" then - local s = cache[str] - if not s then - s = Cs((((1-S(str))^1)/"" + 1)^0) - cache[str] = s +local function serialize(_handle,root,name,specification) -- handle wins + local tname = type(name) + if type(specification) == "table" then + noquotes = specification.noquotes + hexify = specification.hexify + handle = _handle or specification.handle or print + reduce = specification.reduce or false + functions = specification.functions + compact = specification.compact + inline = specification.inline and compact + if functions == nil then + functions = true end - return s - else - return Cs((((1-str)^1)/"" + 1)^0) - end -end - -function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(anything^0) -end - -function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * endofstring)^0) -end - --- Just for fun I looked at the used bytecode and --- p = (p and p + pp) or pp gets one more (testset). - -function lpeg.replacer(one,two,makefunction) - local pattern - if type(one) == "table" then - local no = #one - local p = P(false) - if no == 0 then - for k, v in next, one do - p = p + P(k) / v - end - pattern = Cs((p + 1)^0) - elseif no == 1 then - local o = one[1] - one, two = P(o[1]), o[2] - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two + 1)^0) - else - for i=1,no do - local o = one[i] - p = p + P(o[1]) / o[2] - end - pattern = Cs((p + 1)^0) + if compact == nil then + compact = true end - else - one = P(one) - two = two or "" - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two +1)^0) - end - if makefunction then - return function(str) - return lpegmatch(pattern,str) + if inline == nil then + inline = compact end else - return pattern + noquotes = false + hexify = false + handle = _handle or print + reduce = false + compact = true + inline = true + functions = true end -end - -function lpeg.finder(lst,makefunction) - local pattern - if type(lst) == "table" then - local p = P(false) - for i=1,#lst do - p = p + P(lst[i]) + if tname == "string" then + if name == "return" then + handle("return {") + else + handle(name .. "={") end - pattern = (p + 1)^0 - else - pattern = (P(lst) + 1)^0 - end - if makefunction then - return function(str) - return lpegmatch(pattern,str) + elseif tname == "number" then + if hexify then + handle(format("[0x%04X]={",name)) + else + handle("[" .. name .. "]={") end - else - return pattern - end -end - --- print(lpeg.match(lpeg.replacer("e","a"),"test test")) --- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) --- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) - -local splitters_f, splitters_s = { }, { } - -function lpeg.firstofsplit(separator) -- always return value - local splitter = splitters_f[separator] - if not splitter then - separator = P(separator) - splitter = C((1 - separator)^0) - splitters_f[separator] = splitter - end - return splitter -end - -function lpeg.secondofsplit(separator) -- nil if not split - local splitter = splitters_s[separator] - if not splitter then - separator = P(separator) - splitter = (1 - separator)^0 * separator * C(anything^0) - splitters_s[separator] = splitter - end - return splitter -end - -function lpeg.balancer(left,right) - left, right = P(left), P(right) - return P { left * ((1 - left - right) + V(1))^0 * right } -end - ---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) ---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) ---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) - ---~ -- slower: ---~ ---~ function lpeg.counter(pattern) ---~ local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 ---~ return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end ---~ end - -local nany = utf8char/"" - -function lpeg.counter(pattern) - pattern = Cs((P(pattern)/" " + nany)^0) - return function(str) - return #lpegmatch(pattern,str) - end -end - -if utfgmatch then - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local n = 0 - for _ in utfgmatch(str,what) do - n = n + 1 - end - return n - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) + elseif tname == "boolean" then + if name then + handle("return {") + else + handle("{") end + else + handle("t={") end - -else - - local cache = { } - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local p = cache[what] - if not p then - p = Cs((P(what)/" " + nany)^0) - cache[p] = p - end - return #lpegmatch(p,str) - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) + if root then + -- The dummy access will initialize a table that has a delayed initialization + -- using a metatable. (maybe explicitly test for metatable) + if getmetatable(root) then -- todo: make this an option, maybe even per subtable + local dummy = root._w_h_a_t_e_v_e_r_ + root._w_h_a_t_e_v_e_r_ = nil + end + -- Let's forget about empty tables. + if next(root) then + do_serialize(root,name,"",0) end end - + handle("}") end -local patterns_escapes = { -- also defines in l-string - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%)", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} - -local simple_escapes = { -- also defines in l-string - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} - -local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) -local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) +-- name: +-- +-- true : return { } +-- false : { } +-- nil : t = { } +-- string : string = { } +-- "return" : return { } +-- number : [number] = { } -function string.escapedpattern(str,simple) - return lpegmatch(simple and s or p,str) +function table.serialize(root,name,specification) + local t, n = { }, 0 + local function flush(s) + n = n + 1 + t[n] = s + end + serialize(flush,root,name,specification) + return concat(t,"\n") end --- utf extensies +table.tohandle = serialize -lpeg.UP = lpeg.P +-- sometimes tables are real use (zapfino extra pro is some 85M) in which +-- case a stepwise serialization is nice; actually, we could consider: +-- +-- for line in table.serializer(root,name,reduce,noquotes) do +-- ...(line) +-- end +-- +-- so this is on the todo list -if utfcharacters then +local maxtab = 2*1024 - function lpeg.US(str) - local p = P(false) - for uc in utfcharacters(str) do - p = p + P(uc) +function table.tofile(filename,root,name,specification) + local f = io.open(filename,'w') + if f then + if maxtab > 1 then + local t, n = { }, 0 + local function flush(s) + n = n + 1 + t[n] = s + if n > maxtab then + f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice + t, n = { }, 0 -- we could recycle t if needed + end + end + serialize(flush,root,name,specification) + f:write(concat(t,"\n"),"\n") + else + local function flush(s) + f:write(s,"\n") + end + serialize(flush,root,name,specification) end - return p + f:close() + io.flush() end +end - -elseif utfgmatch then - - function lpeg.US(str) - local p = P(false) - for uc in utfgmatch(str,".") do - p = p + P(uc) +local function flattened(t,f,depth) + if f == nil then + f = { } + depth = 0xFFFF + elseif tonumber(f) then + -- assume that only two arguments are given + depth = f + f = { } + elseif not depth then + depth = 0xFFFF + end + for k, v in next, t do + if type(k) ~= "number" then + if depth > 0 and type(v) == "table" then + flattened(v,f,depth-1) + else + f[k] = v + end end - return p end - -else - - function lpeg.US(str) - local p = P(false) - local f = function(uc) - p = p + P(uc) + local n = #f + for k=1,#t do + local v = t[k] + if depth > 0 and type(v) == "table" then + flattened(v,f,depth-1) + n = #f + else + n = n + 1 + f[n] = v end - lpegmatch((utf8char/f)^0,str) - return p end - + return f end -local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture - -local utfchar = unicode and unicode.utf8 and unicode.utf8.char +table.flattened = flattened -function lpeg.UR(str,more) - local first, last - if type(str) == "number" then - first = str - last = more or first - else - first, last = lpegmatch(range,str) - if not last then - return P(str) - end +local function unnest(t,f) -- only used in mk, for old times sake + if not f then -- and only relevant for token lists + f = { } -- this one can become obsolete end - if first == last then - return P(str) - elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium - local p = P(false) - for i=first,last do - p = p + P(utfchar(i)) - end - return p -- nil when invalid range - else - local f = function(b) - return b >= first and b <= last + for i=1,#t do + local v = t[i] + if type(v) == "table" then + if type(v[1]) == "table" then + unnest(v,f) + else + f[#f+1] = v + end + else + f[#f+1] = v end - -- tricky, these nested captures - return utf8byte / f -- nil when invalid range end + return f end --- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) - ---~ lpeg.print(lpeg.R("ab","cd","gh")) ---~ lpeg.print(lpeg.P("a","b","c")) ---~ lpeg.print(lpeg.S("a","b","c")) - ---~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) ---~ print(lpeg.count("äáàa",lpeg.UP("áà"))) ---~ print(lpeg.count("äáàa",lpeg.US("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("aá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) - -function lpeg.is_lpeg(p) - return p and lpegtype(p) == "pattern" -end - -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order - if type(list) ~= "table" then - list = { list, ... } - end - -- table.sort(list) -- longest match first - local p = P(list[1]) - for l=2,#list do - p = p + P(list[l]) - end - return p +function table.unnest(t) -- bad name + return unnest(t) end --- For the moment here, but it might move to utilities. Beware, we need to --- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we --- loop back from the end cq. prepend. - -local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! - -function lpeg.append(list,pp,delayed,checked) - local p = pp - if #list > 0 then - local keys = fastcopy(list) - sort(keys) - for i=#keys,1,-1 do - local k = keys[i] - if p then - p = P(k) + p - else - p = P(k) - end - end - elseif delayed then -- hm, it looks like the lpeg parser resolves anyway - local keys = sortedkeys(list) - if p then - for i=1,#keys,1 do - local k = keys[i] - local v = list[k] - p = P(k)/list + p - end - else - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - p = P(k) + p - else - p = P(k) - end - end - if p then - p = p / list - end - end - elseif checked then - -- problem: substitution gives a capture - local keys = sortedkeys(list) - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - if k == v then - p = P(k) + p - else - p = P(k)/v + p - end - else - if k == v then - p = P(k) - else - p = P(k)/v +local function are_equal(a,b,n,m) -- indexed + if a and b and #a == #b then + n = n or 1 + m = m or #a + for i=n,m do + local ai, bi = a[i], b[i] + if ai==bi then + -- same + elseif type(ai) == "table" and type(bi) == "table" then + if not are_equal(ai,bi) then + return false end - end - end - else - local keys = sortedkeys(list) - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - p = P(k)/v + p else - p = P(k)/v + return false end end + return true + else + return false end - return p end --- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) --- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) +local function identical(a,b) -- assumes same structure + for ka, va in next, a do + local vb = b[ka] + if va == vb then + -- same + elseif type(va) == "table" and type(vb) == "table" then + if not identical(va,vb) then + return false + end + else + return false + end + end + return true +end --- function lpeg.exact_match(words,case_insensitive) --- local pattern = concat(words) --- if case_insensitive then --- local pattern = S(upper(characters)) + S(lower(characters)) --- local list = { } --- for i=1,#words do --- list[lower(words[i])] = true --- end --- return Cmt(pattern^1, function(_,i,s) --- return list[lower(s)] and i --- end) --- else --- local pattern = S(concat(words)) --- local list = { } --- for i=1,#words do --- list[words[i]] = true --- end --- return Cmt(pattern^1, function(_,i,s) --- return list[s] and i --- end) --- end --- end +table.identical = identical +table.are_equal = are_equal --- experiment: +-- maybe also make a combined one -local function make(t) - local p --- for k, v in next, t do - for k, v in table.sortedhash(t) do - if not p then - if next(v) then - p = P(k) * make(v) - else - p = P(k) +function table.compact(t) -- remove empty tables, assumes subtables + if t then + for k, v in next, t do + if not next(v) then -- no type checking + t[k] = nil end - else - if next(v) then - p = p + P(k) * make(v) - else - p = p + P(k) + end + end +end + +function table.contains(t, v) + if t then + for i=1, #t do + if t[i] == v then + return i end end end - return p + return false end -function lpeg.utfchartabletopattern(list) - local tree = { } - for i=1,#list do - local t = tree - for c in gmatch(list[i],".") do - if not t[c] then - t[c] = { } +function table.count(t) + local n = 0 + for k, v in next, t do + n = n + 1 + end + return n +end + +function table.swapped(t,s) -- hash + local n = { } + if s then + for k, v in next, s do + n[k] = v + end + end + for k, v in next, t do + n[v] = k + end + return n +end + +function table.mirrored(t) -- hash + local n = { } + for k, v in next, t do + n[v] = k + n[k] = v + end + return n +end + +function table.reversed(t) + if t then + local tt, tn = { }, #t + if tn > 0 then + local ttn = 0 + for i=tn,1,-1 do + ttn = ttn + 1 + tt[ttn] = t[i] end - t = t[c] end + return tt end - return make(tree) end --- inspect ( lpeg.utfchartabletopattern { --- utfchar(0x00A0), -- nbsp --- utfchar(0x2000), -- enquad --- utfchar(0x2001), -- emquad --- utfchar(0x2002), -- enspace --- utfchar(0x2003), -- emspace --- utfchar(0x2004), -- threeperemspace --- utfchar(0x2005), -- fourperemspace --- utfchar(0x2006), -- sixperemspace --- utfchar(0x2007), -- figurespace --- utfchar(0x2008), -- punctuationspace --- utfchar(0x2009), -- breakablethinspace --- utfchar(0x200A), -- hairspace --- utfchar(0x200B), -- zerowidthspace --- utfchar(0x202F), -- narrownobreakspace --- utfchar(0x205F), -- math thinspace --- } ) +function table.reverse(t) + if t then + local n = #t + for i=1,floor(n/2) do + local j = n - i + 1 + t[i], t[j] = t[j], t[i] + end + return t + end +end --- handy from within tex: +function table.sequenced(t,sep) -- hash only + if t then + local s, n = { }, 0 + for k, v in sortedhash(t) do + if simple then + if v == true then + n = n + 1 + s[n] = k + elseif v and v~= "" then + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + else + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + end + return concat(s, sep or " | ") + else + return "" + end +end -local lpegmatch = lpeg.match +function table.print(t,...) + if type(t) ~= "table" then + print(tostring(t)) + else + table.tohandle(print,t,...) + end +end -local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! +-- -- -- obsolete but we keep them for a while and might comment them later -- -- -- -function string.tformat(fmt,...) - return format(lpegmatch(replacer,fmt),...) +-- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) + +function table.sub(t,i,j) + return { unpack(t,i,j) } end --- strips leading and trailing spaces and collapsed all other spaces +-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) + +function table.is_empty(t) + return not t or not next(t) +end -local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) +function table.has_one_entry(t) + return t and not next(t,next(t)) +end -function string.collapsespaces(str) - return lpegmatch(pattern,str) +-- new + +function table.loweredkeys(t) -- maybe utf + local l = { } + for k, v in next, t do + l[lower(k)] = v + end + return l +end + +-- new, might move (maybe duplicate) + +function table.unique(old) + local hash = { } + local new = { } + local n = 0 + for i=1,#old do + local oi = old[i] + if not hash[oi] then + n = n + 1 + new[n] = oi + hash[oi] = true + end + end + return new end +function table.sorted(t,...) + sort(t,...) + return t -- still sorts in-place +end + + end -- closure do -- begin closure to overcome local limits and interference @@ -2205,25 +2190,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = C((noslashes^0 * slashes^1)^1) local function pathpart(name,default) - return lpegmatch(pattern,name) or default or "" + return name and lpegmatch(pattern,name) or default or "" end local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 local function basename(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0 local function nameonly(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 local function suffixonly(name) - return lpegmatch(pattern,name) or "" + return name and lpegmatch(pattern,name) or "" end file.pathpart = pathpart @@ -2254,7 +2239,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture local pattern_d = path * rest function file.splitname(str,splitdrive) - if splitdrive then + if not str then + -- error + elseif splitdrive then return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix else return lpegmatch(pattern_b,str) -- returns path, base, suffix @@ -2262,34 +2249,36 @@ function file.splitname(str,splitdrive) end function file.splitbase(str) - return lpegmatch(pattern_d,str) -- returns path, base+suffix + return str and lpegmatch(pattern_d,str) -- returns path, base+suffix end function file.nametotable(str,splitdrive) -- returns table - local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) - if splitdrive then - return { - path = path, - drive = drive, - subpath = subpath, - name = name, - base = base, - suffix = suffix, - } - else - return { - path = path, - name = name, - base = base, - suffix = suffix, - } + if str then + local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) + if splitdrive then + return { + path = path, + drive = drive, + subpath = subpath, + name = name, + base = base, + suffix = suffix, + } + else + return { + path = path, + name = name, + base = base, + suffix = suffix, + } + end end end local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1) function file.removesuffix(name) - return lpegmatch(pattern,name) + return name and lpegmatch(pattern,name) end -- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 @@ -2306,8 +2295,8 @@ end local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) -function file.addsuffix(filename, suffix, criterium) - if not suffix or suffix == "" then +function file.addsuffix(filename,suffix,criterium) + if not filename or not suffix or suffix == "" then return filename elseif criterium == true then return filename .. "." .. suffix @@ -2353,7 +2342,7 @@ local suffix = period * (1-period-slashes)^1 * -1 local pattern = Cs((1-suffix)^0) function file.replacesuffix(name,suffix) - if suffix and suffix ~= "" then + if name and suffix and suffix ~= "" then return lpegmatch(pattern,name) .. "." .. suffix else return name @@ -2362,10 +2351,10 @@ end -- -local reslasher = lpeg.replacer(S("\\"),"/") +local reslasher = lpeg.replacer(P("\\"),"/") function file.reslash(str) - return lpegmatch(reslasher,str) + return str and lpegmatch(reslasher,str) end -- We should be able to use: @@ -2381,7 +2370,9 @@ end -- variant: function file.is_writable(name) - if lfs.isdir(name) then + if not name then + -- error + elseif lfs.isdir(name) then name = name .. "/m_t_x_t_e_s_t.tmp" local f = io.open(name,"wb") if f then @@ -2409,24 +2400,32 @@ end local readable = P("r") * Cc(true) function file.is_readable(name) - local a = attributes(name) - return a and lpegmatch(readable,a.permissions) or false + if name then + local a = attributes(name) + return a and lpegmatch(readable,a.permissions) or false + else + return false + end end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated function file.size(name) - local a = attributes(name) - return a and a.size or 0 + if name then + local a = attributes(name) + return a and a.size or 0 + else + return 0 + end end function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) - return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) + return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) end function file.joinpath(tab,separator) -- table - return concat(tab,separator or io.pathseparator) -- can have trailing // + return tab and concat(tab,separator or io.pathseparator) -- can have trailing // end local stripper = Cs(P(fwslash)^0/"" * reslasher) @@ -2434,14 +2433,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon local isroot = fwslash^1 * -1 local hasroot = fwslash^1 -function file.join(...) -- rather dirty +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...) local lst = { ... } local one = lst[1] if lpegmatch(isnetwork,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) return one .. "/" .. two elseif lpegmatch(isroot,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) if lpegmatch(hasroot,two) then return two else @@ -2450,7 +2458,7 @@ function file.join(...) -- rather dirty elseif one == "" then return lpegmatch(stripper,concat(lst,"/",2)) else - return lpegmatch(reslasher,concat(lst,"/")) + return lpegmatch(deslasher,concat(lst,"/")) end end @@ -2479,6 +2487,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C local absolute = fwslash function file.collapsepath(str,anchor) + if not str then + return + end if anchor and not lpegmatch(anchors,str) then str = getcurrentdir() .. "/" .. str end @@ -2488,7 +2499,6 @@ function file.collapsepath(str,anchor) return lpegmatch(reslasher,str) end local starter, oldelements = lpegmatch(splitstarter,str) --- inspect(oldelements) local newelements = { } local i = #oldelements while i > 0 do @@ -2542,11 +2552,13 @@ local whatever = P("-")^0 / "" local pattern_b = Cs(whatever * (1 - whatever * -1)^1) function file.robustname(str,strict) - str = lpegmatch(pattern_a,str) or str - if strict then - return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) - else - return str + if str then + str = lpegmatch(pattern_a,str) or str + if strict then + return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) + else + return str + end end end @@ -2554,7 +2566,9 @@ file.readdata = io.loaddata file.savedata = io.savedata function file.copy(oldname,newname) - file.savedata(newname,io.loaddata(oldname)) + if oldname and newname then + file.savedata(newname,io.loaddata(oldname)) + end end -- also rewrite previous @@ -2575,11 +2589,11 @@ lpeg.patterns.rootbased = rootbased -- ./name ../name /name c: :// name/name function file.is_qualified_path(filename) - return lpegmatch(qualified,filename) ~= nil + return filename and lpegmatch(qualified,filename) ~= nil end function file.is_rootbased_path(filename) - return lpegmatch(rootbased,filename) ~= nil + return filename and lpegmatch(rootbased,filename) ~= nil end -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end @@ -2601,8 +2615,10 @@ end -- for myself: function file.strip(name,dir) - local b, a = match(name,"^(.-)" .. dir .. "(.*)$") - return a ~= "" and a or name + if name then + local b, a = match(name,"^(.-)" .. dir .. "(.*)$") + return a ~= "" and a or name + end end -- local debuglist = { @@ -2954,7 +2970,7 @@ function io.readstring(f,n,m) f:seek("set",n) n = m end - local str = gsub(f:read(n),"%z","") + local str = gsub(f:read(n),"\000","") return str end @@ -3544,11 +3560,8 @@ if not modules then modules = { } end modules ['font-con'] = { license = "see context related readme files" } - -- some names of table entries will be changed (no _) -local utf = unicode.utf8 - local next, tostring, rawget = next, tostring, rawget local format, match, lower, gsub = string.format, string.match, string.lower, string.gsub local utfbyte = utf.byte @@ -5651,8 +5664,6 @@ if not modules then modules = { } end modules ['font-otf'] = { -- more checking against low level calls of functions -local utf = unicode.utf8 - local utfbyte = utf.byte local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip local type, next, tonumber, tostring = type, next, tonumber, tostring @@ -6092,32 +6103,32 @@ function otf.load(filename,format,sub,featurefile) starttiming(data) report_otf("file size: %s", size) enhancers.apply(data,filename,fontdata) + local packtime = { } if packdata then if cleanup > 0 then collectgarbage("collect") ---~ lua.collectgarbage() end + starttiming(packtime) enhance("pack",data,filename,nil) + stoptiming(packtime) end report_otf("saving in cache: %s",filename) data = containers.write(otf.cache, hash, data) if cleanup > 1 then collectgarbage("collect") ---~ lua.collectgarbage() end stoptiming(data) if elapsedtime then -- not in generic - report_otf("preprocessing and caching took %s seconds",elapsedtime(data)) + report_otf("preprocessing and caching took %s seconds (packtime: %s)", + elapsedtime(data),packdata and elapsedtime(packtime) or 0) end fontloader.close(fontdata) -- free memory if cleanup > 3 then collectgarbage("collect") ---~ lua.collectgarbage() end data = containers.read(otf.cache, hash) -- this frees the old table and load the sparse one if cleanup > 2 then collectgarbage("collect") ---~ lua.collectgarbage() end else data = nil @@ -6695,7 +6706,10 @@ actions["reorganize subtables"] = function(data,filename,raw) -- local name = gk.name -- - if features then + if not name then + -- in fact an error + report_otf("skipping weird lookup number %s",k) + elseif features then -- scripts, tag, ismac local f = { } for i=1,#features do @@ -7128,6 +7142,9 @@ actions["merge kern classes"] = function(data,filename,raw) if type(lookups) ~= "table" then lookups = { lookups } end + -- if offsets[1] == nil then + -- offsets[1] = "" + -- end -- we can check the max in the loop -- local maxseconds = getn(seconds) for n, s in next, firsts do @@ -7148,9 +7165,9 @@ actions["merge kern classes"] = function(data,filename,raw) if splt then local extrakerns = { } local baseoffset = (fk-1) * maxseconds - -- for sk=2,maxseconds do - -- local sv = seconds[sk] - for sk, sv in next, seconds do + for sk=2,maxseconds do -- will become 1 based in future luatex + local sv = seconds[sk] + -- for sk, sv in next, seconds do local splt = split[sv] if splt then -- redundant test local offset = offsets[baseoffset + sk] @@ -8377,7 +8394,7 @@ if not modules then modules = { } end modules ['node-inj'] = { -- This is very experimental (this will change when we have luatex > .50 and -- a few pending thingies are available. Also, Idris needs to make a few more -- test fonts. Btw, future versions of luatex will have extended glyph properties --- that can be of help. +-- that can be of help. Some optimizations can go away when we have faster machines. local next = next @@ -8864,7 +8881,7 @@ if not modules then modules = { } end modules ['font-ota'] = { -- this might become scrp-*.lua -local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat +local type = type if not trackers then trackers = { register = function() end } end @@ -9249,6 +9266,8 @@ if not modules then modules = { } end modules ['font-otn'] = { -- handle positions (we need example fonts) -- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) -- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +-- remove some optimizations (when I have a faster machine) + --[[ldx--

This module is a bit more split up that I'd like but since we also want to test @@ -12601,7 +12620,6 @@ if not modules then modules = { } end modules ['font-def'] = { -- We can overload some of the definers.functions so we don't local them. -local concat = table.concat local format, gmatch, match, find, lower, gsub = string.format, string.gmatch, string.match, string.find, string.lower, string.gsub local tostring, next = tostring, next local lpegmatch = lpeg.match diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index dd2c902cd..6b502cd24 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -22,7 +22,7 @@ if not modules then modules = { } end modules ['luatex-fonts'] = { -- -- Todo: all global namespaces in called modules will get local shortcuts. -utf = unicode.utf8 +utf = utf or unicode.utf8 if not generic_context then @@ -132,10 +132,10 @@ else -- mess up ConTeXt code for the sake of general generality. Around -- version 1.0 there will be an official api defined. + loadmodule('l-lpeg.lua') loadmodule('l-function.lua') loadmodule('l-string.lua') loadmodule('l-table.lua') - loadmodule('l-lpeg.lua') loadmodule('l-boolean.lua') loadmodule('l-math.lua') loadmodule('l-file.lua') -- cgit v1.2.3