From 31c8bf0930a6d3e353a552d6bab71f62bc25f34e Mon Sep 17 00:00:00 2001
From: Marius
The next code is an adaptation of code from Wolfgang Schuster -as posted on the mailing list. This version supports nested -braces and unbraced integers as scripts. We could consider -spaces as terminals for them but first let collect a bunch -of input then.
+The next code started out as adaptation of code from Wolfgang Schuster as +posted on the mailing list. The current version supports nested braces and +unbraced integers as scripts.
]]-- --- some lpeg, maybe i'll make an syst-lpg module - -local lowercase = R("az") -local uppercase = R("AZ") -local backslash = P("\\") -local csname = backslash * P(1) * (1-backslash)^0 -local plus = P("+") / "\\textplus " -local minus = P("-") / "\\textminus " -local digit = R("09") -local sign = plus + minus -local cardinal = digit^1 -local integer = sign^0 * cardinal - -local leftbrace = P("{") -local rightbrace = P("}") -local nobrace = 1 - (leftbrace + rightbrace) -local nested = P { leftbrace * (csname + sign + nobrace + V(1))^0 * rightbrace } -local any = P(1) - -local subscript = P("_") -local superscript = P("^") -local somescript = subscript + superscript - -local content = Cs(csname + nested + sign + any) - --- could be made more efficient - -local lowhigh = Cc("\\lohi{%s}{%s}") * subscript * content * superscript * content / format -local highlow = Cc("\\hilo{%s}{%s}") * superscript * content * subscript * content / format -local low = Cc("\\low{%s}") * subscript * content / format -local high = Cc("\\high{%s}") * superscript * content / format -local justtext = (1 - somescript)^1 -local parser = Cs((csname + lowhigh + highlow + low + high + sign + any)^0) - -chemistry.moleculeparser = parser -- can be used to avoid functioncall +local moleculeparser = cpatterns.scripted +chemistry.moleculeparser = moleculeparser function chemistry.molecule(str) - return lpegmatch(parser,str) + return lpegmatch(moleculeparser,str) end function commands.molecule(str) if trace_molecules then - local rep = lpegmatch(parser,str) + local rep = lpegmatch(moleculeparser,str) report_chemistry("molecule %s => %s",str,rep) context(rep) else - context(lpegmatch(parser,str)) + context(lpegmatch(moleculeparser,str)) end end diff --git a/tex/context/base/chem-str.lua b/tex/context/base/chem-str.lua index dc4bd746f..3ab2e53b6 100644 --- a/tex/context/base/chem-str.lua +++ b/tex/context/base/chem-str.lua @@ -21,24 +21,40 @@ if not modules then modules = { } end modules ['chem-str'] = { -- the current user interface is slightly different from the old one but hopefully users -- will like the added value. -local trace_structure = false trackers.register("chemistry.structure", function(v) trace_structure = v end) -local trace_metapost = false trackers.register("chemistry.metapost", function(v) trace_metapost = v end) -local trace_textstack = false trackers.register("chemistry.textstack", function(v) trace_textstack = v end) +-- directive_strictorder: one might set this to off when associated texts are disordered too + +local trace_structure = false trackers .register("chemistry.structure", function(v) trace_structure = v end) +local trace_metapost = false trackers .register("chemistry.metapost", function(v) trace_metapost = v end) +local trace_textstack = false trackers .register("chemistry.textstack", function(v) trace_textstack = v end) +local directive_strictorder = true directives.register("chemistry.strictorder", function(v) directive_strictorder = v end) +local directive_strictindex = false directives.register("chemistry.strictindex", function(v) directive_strictindex = v end) local report_chemistry = logs.reporter("chemistry") local format, gmatch, match, lower, gsub = string.format, string.gmatch, string.match, string.lower, string.gsub -local concat, insert, remove = table.concat, table.insert, table.remove +local concat, insert, remove, unique, sorted = table.concat, table.insert, table.remove, table.unique, table.sorted local processor_tostring = typesetters and typesetters.processors.tostring local settings_to_array = utilities.parsers.settings_to_array local settings_to_array_with_repeat = utilities.parsers.settings_to_array_with_repeat local lpegmatch = lpeg.match -local P, R, S, C, Cs, Ct, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc +local P, R, S, C, Cs, Ct, Cc, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc, lpeg.Cmt local variables = interfaces and interfaces.variables local context = context +local v_default = variables.default +local v_small = variables.small +local v_medium = variables.medium +local v_big = variables.big +local v_normal = variables.normal +local v_fit = variables.fit +local v_on = variables.on + +local mpnamedcolor = attributes.colors.mpnamedcolor +local topoints = number.topoints +local todimen = string.todimen + chemistry = chemistry or { } local chemistry = chemistry @@ -47,39 +63,44 @@ chemistry.format = "metafun" chemistry.structures = 0 local common_keys = { - b = "line", - r = "line", - sb = "line", - sr = "line", - rd = "line", - rh = "line", - cc = "line", - ccd = "line", - line = "line", - dash = "line", - arrow = "line", - c = "fixed", - cd = "fixed", - z = "text", - zt = "text", - zlt = "text", - zrt = "text", - rz = "text", - rt = "text", - lrt = "text", - rrt = "text", - zln = "number", - zrn = "number", - rn = "number", - lrn = "number", - rrn = "number", - zn = "number", - mov = "transform", - mark = "transform", - move = "transform", - off = "transform", - adj = "transform", - sub = "transform", + b = "line", + r = "line", + sb = "line", + sr = "line", + rd = "line", + rh = "line", + rb = "line", + rbd = "line", + cc = "line", + ccd = "line", + line = "line", + dash = "line", + arrow = "line", + c = "fixed", + cd = "fixed", + z = "text", + zt = "text", + zlt = "text", + zrt = "text", + rz = "text", + rt = "text", + lrt = "text", + rrt = "text", + label = "text", + zln = "number", + zrn = "number", + rn = "number", + lrn = "number", + rrn = "number", + zn = "number", + number = "number", + mov = "transform", + mark = "transform", + move = "transform", + diff = "transform", + off = "transform", + adj = "transform", + sub = "transform", } local front_keys = { @@ -89,6 +110,14 @@ local front_keys = { lr = "line", lsr = "line", rsr = "line", + lrd = "line", + rrd = "line", + lrh = "line", + rrh = "line", + lrbd = "line", + rrbd = "line", + lrb = "line", + rrb = "line", lrz = "text", rrz = "text", lsub = "transform", @@ -99,15 +128,14 @@ local one_keys = { db = "line", tb = "line", bb = "line", - rb = "line", dr = "line", hb = "line", bd = "line", bw = "line", oe = "line", sd = "line", - ld = "line", - rd = "line", + rdb = "line", + ldb = "line", ldd = "line", rdd = "line", ep = "line", @@ -130,9 +158,12 @@ local ring_keys = { rsr = "line", lrd = "line", rrd = "line", - rb = "line", lrb = "line", rrb = "line", + lrh = "line", + rrh = "line", + lrbd = "line", + rrbd = "line", dr = "line", eb = "line", er = "line", @@ -199,7 +230,10 @@ local syntax = { mp = { direct = '%s', arguments = 1 }, -- backdoor MP code - dangerous! } -local definitions = { } +chemistry.definitions = chemistry.definitions or { } +local definitions = chemistry.definitions + +storage.register("chemistry/definitions",definitions,"chemistry.definitions") function chemistry.undefine(name) definitions[lower(name)] = nil @@ -218,7 +252,7 @@ function chemistry.define(name,spec,text) } end -local metacode, variant, keys, max, txt, pstack, sstack +local metacode, variant, keys, max, txt, pstack, sstack, align local molecule = chemistry.molecule -- or use lpegmatch(chemistry.moleculeparser,...) local function fetch(txt) @@ -260,10 +294,10 @@ local special = (colon * C(other^1)) + Cc("") local text = (equal * C(P(1)^0)) + Cc(false) local pattern = - (amount + Cc(1)) * - (remapped + Cc("")) * - Cs(operation/lower) * - Cs(special/lower) * ( + (amount + Cc(1)) + * (remapped + Cc("")) + * Cs(operation/lower) + * Cs(special/lower) * ( range * Cc(false) * text + Cc(false) * Cc(false) * set * text + single * Cc(false) * Cc(false) * text + @@ -278,43 +312,88 @@ local pattern = -- print(lpegmatch(pattern,"RZ13=x")) -- 1 RZ false false table x local t_initialize = 'if unknown context_chem : input mp-chem.mpiv ; fi ;' -local t_start_structure = 'chem_start_structure(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' +local t_start_structure = 'chem_start_structure(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' local t_stop_structure = 'chem_stop_structure;' local t_start_component = 'chem_start_component;' local t_stop_component = 'chem_stop_component;' local t_line = 'chem_%s%s(%s,%s,%s,%s,%s);' local t_set = 'chem_set(%s);' -local t_number = 'chem_%s(%s,%s,"\\chemicaltext{%s}");' +local t_number = 'chem_%s%s(%s,%s,"\\chemicaltext{%s}");' local t_text = t_number local t_empty_normal = 'chem_%s(%s,%s,"");' local t_empty_center = 'chem_c%s(%s,%s,"");' local t_transform = 'chem_%s(%s,%s,%s);' -local function process(spec,text,n,rulethickness,rulecolor,offset) - insert(stack,{ spec=spec, text=text, n=n }) +local prepareMPvariable = commands and commands.prepareMPvariable + +local function process(level,spec,text,n,rulethickness,rulecolor,offset,default_variant) + insert(stack,{ spec = spec, text = text, n = n }) local txt = #stack local m = #metacode + local saved_rulethickness = rulethickness + local saved_rulecolor = rulecolor + local saved_align = align + local current_variant = default_variant or "six" for i=1,#spec do local step = spec[i] local s = lower(step) - local d = definitions[s] + local n = current_variant .. ":" .. s + local d = definitions[n] + if not d then + n = s + d = definitions[n] + end if d then if trace_structure then - report_chemistry("%s => definition: %s",step,s) + report_chemistry("%s > %s => definition: %s (%s snippets)",level,step,n,#d) end for i=1,#d do local di = d[i] - process(di.spec,di.text,1,rulethickness,rulecolor) -- offset? + current_variant = process(level+1,di.spec,di.text,1,rulethickness,rulecolor,offset,current_variant) -- offset? end else - --~local rep, operation, special, index, upto, set, text = lpegmatch(pattern,step) local factor, osign, operation, special, index, upto, set, text = lpegmatch(pattern,step) if trace_structure then local set = set and concat(set," ") or "-" - report_chemistry("%s => factor: %s, osign: %s operation: %s, special: %s, index: %s, upto: %s, set: %s, text: %s", - step,factor or "",osign or "",operation or "-",special and special ~= "" or "-",index or "-",upto or "-",set or "-",text or "-") + report_chemistry("%s > %s => factor: %s, osign: %s operation: %s, special: %s, index: %s, upto: %s, set: %s, text: %s", + level,step,factor or "",osign or "",operation or "-",special and special ~= "" or "-",index or "-",upto or "-",set or "-",text or "-") end - if operation == "pb" then + if operation == "rulecolor" then + local t = text + if not t then + txt, t = fetch(txt) + end + if t == v_default or t == v_normal or t == "" then + rulecolor = saved_rulecolor + elseif t then + rulecolor = mpnamedcolor(t) + end + elseif operation == "rulethickness" then + local t = text + if not t then + txt, t = fetch(txt) + end + if t == v_default or t == v_normal or t == t_medium or t == "" then + rulethickness = saved_rulethickness + elseif t == v_small then + rulethickness = topoints(1/1.2 * todimen(saved_rulethickness)) + elseif t == v_big then + rulethickness = topoints(1.2 * todimen(saved_rulethickness)) + elseif t then + -- rulethickness = topoints(todimen(t)) -- mp can't handle sp + rulethickness = topoints(tonumber(t) * todimen(saved_rulethickness)) + end + elseif operation == "symalign" then + local t = text + if not t then + txt, t = fetch(txt) + end + if t == v_default or t == v_normal then + align = saved_align + elseif t and t ~= "" then + align = "." .. t + end + elseif operation == "pb" then insert(pstack,variant) m = m + 1 ; metacode[m] = syntax.pb.direct if keys[special] == "text" and index then @@ -330,6 +409,7 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) keys, max = ss.keys, ss.max m = m + 1 ; metacode[m] = syntax[operation].direct m = m + 1 ; metacode[m] = format(t_set,variant) + current_variant = variant elseif operation == "save" then insert(sstack,variant) m = m + 1 ; metacode[m] = syntax.save.direct @@ -339,8 +419,52 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) keys, max = ss.keys, ss.max m = m + 1 ; metacode[m] = syntax[operation].direct m = m + 1 ; metacode[m] = format(t_set,variant) + current_variant = variant elseif operation then local ss = syntax[operation] + local what = keys[operation] + local ns = 0 + if set then + local sv = syntax[current_variant] + local ms = sv and sv.max + set = unique(set) + ns = #set + if directive_strictorder then + if what == "line" then + set = sorted(set) + end + if directive_strictindex and ms then + for i=ns,1,-1 do + local si = set[i] + if si > ms then + report_chemistry("%s > operation %s: limited to %s steps, ignoring %s", + level,operation,ms,si) + set[i] = nil + ns = ns - 1 + else + break + end + end + end + else + if directive_strictindex and ms then + local t, nt = { }, 0 + for i=1,ns do + local si = set[i] + if si > ms then + report_chemistry("%s > operation %s: limited to %s steps, ignoring %s", + level,operation,ms,si) + set[i] = nil + else + nt = nt + 1 + t[nt] = si + end + end + ns = nt + set = t + end + end + end if ss then local ds = ss.direct if ds then @@ -348,7 +472,7 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) if sa == 1 then local one ; txt, one = fetch(txt) m = m + 1 ; metacode[m] = format(ds,one or "") - elseif sa ==2 then + elseif sa == 2 then local one ; txt, one = fetch(txt) local two ; txt, two = fetch(txt) m = m + 1 ; metacode[m] = format(ds,one or "",two or "") @@ -358,118 +482,120 @@ local function process(spec,text,n,rulethickness,rulecolor,offset) elseif ss.keys then variant, keys, max = s, ss.keys, ss.max m = m + 1 ; metacode[m] = format(t_set,variant) + current_variant = variant end - else - local what = keys[operation] - if what == "line" then - local s = osign - if s ~= "" then s = "." .. s end - if set then - -- condense consecutive numbers in a set to a range - -- (numbers modulo max are currently not dealt with...) - table.sort(set) - local sf, st = set[1] - for i=1,#set do - if i > 1 and set[i] ~= set[i-1]+1 then - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) - sf = set[i] - end - st = set[i] + elseif what == "line" then + local s = osign + if s ~= "" then + s = "." .. s + end + if set then + -- condense consecutive numbers in a set to a range + local sf, st = set[1] + for i=1,ns do + if i > 1 and set[i] ~= set[i-1]+1 then + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) + sf = set[i] end - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) - elseif upto then - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,upto,rulethickness,rulecolor) - elseif index then - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,index,rulethickness,rulecolor) - else - m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,1,max,rulethickness,rulecolor) + st = set[i] end - elseif what == "number" then - if set then - for i=1,#set do - local si = set[i] - m = m + 1 ; metacode[m] = format(t_number,operation,variant,si,si) - end - elseif upto then - for i=index,upto do - local si = set[i] - m = m + 1 ; metacode[m] = format(t_number,operation,variant,si,si) - end - elseif index then - m = m + 1 ; metacode[m] = format(t_number,operation,variant,index,index) - else - for i=1,max do - m = m + 1 ; metacode[m] = format(t_number,operation,variant,i,i) - end + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,sf,st,rulethickness,rulecolor) + elseif upto then + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,upto,rulethickness,rulecolor) + elseif index then + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,index,index,rulethickness,rulecolor) + else + m = m + 1 ; metacode[m] = format(t_line,operation,s,variant,1,max,rulethickness,rulecolor) + end + elseif what == "number" then + if set then + for i=1,ns do + local si = set[i] + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,si,si) end - elseif what == "text" then - if set then - for i=1,#set do - local si = set[i] - local t = text - if not t then txt, t = fetch(txt) end - if t then - t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,si,t) - end - end - elseif upto then - for i=index,upto do - local t = text - if not t then txt, t = fetch(txt) end - if t then - t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,i,t) - end - end - elseif index == 0 then + elseif upto then + for i=index,upto do + local si = set[i] + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,si,si) + end + elseif index then + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,index,index) + else + for i=1,max do + m = m + 1 ; metacode[m] = format(t_number,operation,align,variant,i,i) + end + end + elseif what == "text" then + if set then + for i=1,ns do + local si = set[i] local t = text if not t then txt, t = fetch(txt) end if t then t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,index,t) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,si,t) end - elseif index then + end + elseif upto then + for i=index,upto do local t = text if not t then txt, t = fetch(txt) end if t then t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,index,t) - end - else - for i=1,max do - local t = text - if not t then txt, t = fetch(txt) end - if t then - t = molecule(processor_tostring(t)) - m = m + 1 ; metacode[m] = format(t_text,operation,variant,i,t) - end + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,i,t) end end - elseif what == "transform" then - if osign == "m" then factor = -factor end - if set then - for i=1,#set do - local si = set[i] - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,si,factor) - end - elseif upto then - for i=index,upto do - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,i,factor) + elseif index == 0 then + local t = text + if not t then txt, t = fetch(txt) end + if t then + t = molecule(processor_tostring(t)) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,index,t) + end + elseif index then + local t = text + if not t then txt, t = fetch(txt) end + if t then + t = molecule(processor_tostring(t)) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,index,t) + end + else + for i=1,max do + local t = text + if not t then txt, t = fetch(txt) end + if t then + t = molecule(processor_tostring(t)) + m = m + 1 ; metacode[m] = format(t_text,operation,align,variant,i,t) end - else - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,index or 1,factor) end - elseif what == "fixed" then - m = m + 1 ; metacode[m] = format(t_transform,operation,variant,rulethickness,rulecolor) - elseif trace_structure then - report_chemistry("warning: undefined operation %s ignored here", operation or "") end + elseif what == "transform" then + if osign == "m" then + factor = -factor + end + if set then + for i=1,ns do + local si = set[i] + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,si,factor) + end + elseif upto then + for i=index,upto do + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,i,factor) + end + else + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,index or 1,factor) + end + elseif what == "fixed" then + m = m + 1 ; metacode[m] = format(t_transform,operation,variant,rulethickness,rulecolor) + elseif trace_structure then + report_chemistry("%s > warning: undefined operation %s ignored here", + level, operation or "") end end end end remove(stack) + return current_variant end -- the size related values are somewhat special but we want to be @@ -480,19 +606,22 @@ end function chemistry.start(settings) chemistry.structures = chemistry.structures + 1 local emwidth, rulethickness, rulecolor, axiscolor = settings.emwidth, settings.rulethickness, settings.rulecolor, settings.framecolor - local width, height, scale, offset = settings.width or 0, settings.height or 0, settings.scale or "normal", settings.offset or 0 + local width, height, scale, rotation, offset = settings.width or 0, settings.height or 0, settings.scale or "normal", settings.rotation or 0, settings.offset or 0 local l, r, t, b = settings.left or 0, settings.right or 0, settings.top or 0, settings.bottom or 0 -- metacode = { } -- + align = settings.symalign or "auto" if trace_structure then - report_chemistry("scale: %s, width: %s, height: %s, l: %s, r: %s, t: %s, b: %s", scale, width, height, l, r, t, b) + report_chemistry("scale: %s, rotation: %s, width: %s, height: %s, l: %s, r: %s, t: %s, b: %s", scale, rotation, width, height, l, r, t, b) + report_chemistry("symalign: %s", align) end - if scale == variables.small then + if align ~= "" then align = "." .. align end + if scale == v_small then scale = 1/1.2 - elseif scale == variables.normal or scale == variables.medium or scale == 0 then + elseif scale == v_normal or scale == v_medium or scale == 0 then scale = 1 - elseif scale == variables.big then + elseif scale == v_big then scale = 1.2 else scale = tonumber(scale) @@ -504,7 +633,7 @@ function chemistry.start(settings) scale = .01 end end - if width == variables.fit then + if width == v_fit then width = true else width = tonumber(width) or 0 @@ -529,7 +658,7 @@ function chemistry.start(settings) end width = false end - if height == variables.fit then + if height == v_fit then height = true else height = tonumber(height) or 0 @@ -554,12 +683,13 @@ function chemistry.start(settings) end height = false end + rotation = tonumber(rotation) or 0 -- metacode[#metacode+1] = format(t_start_structure, chemistry.structures, - l, r, t, b, scale, + l, r, t, b, scale, rotation, tostring(width), tostring(height), tostring(emwidth), tostring(offset), - tostring(settings.axis == variables.on), tostring(rulethickness), tostring(axiscolor) + tostring(settings.axis == v_on), tostring(rulethickness), tostring(axiscolor) ) -- variant, keys, stack, pstack, sstack = "one", { }, { }, { }, { } @@ -590,7 +720,7 @@ function chemistry.component(spec,text,settings) local text = settings_to_array_with_repeat(text,true) -- inspect(spec) metacode[#metacode+1] = t_start_component - process(spec,text,1,rulethickness,rulecolor) -- offset? + process(1,spec,text,1,rulethickness,rulecolor) -- offset? metacode[#metacode+1] = t_stop_component end diff --git a/tex/context/base/chem-str.mkiv b/tex/context/base/chem-str.mkiv index 205675c46..cb840ed80 100644 --- a/tex/context/base/chem-str.mkiv +++ b/tex/context/base/chem-str.mkiv @@ -141,6 +141,7 @@ \def\chem_start[#1][#2]% {\ifmmode\vcenter\else\vbox\fi \bgroup + \synchronizestrut{\chemicalparameter\c!strut}% \dontcomplain \settrue\indisplaychemical \forgetall @@ -164,6 +165,8 @@ top = \chemicalparameter\c!top, bottom = \chemicalparameter\c!bottom, scale = "\chemicalparameter\c!scale", + rotation = "\chemicalparameter\c!rotation", + symalign = "\chemicalparameter\c!symalign", axis = "\chemicalparameter\c!axis", framecolor = "\MPcolor{\chemicalparameter\c!framecolor}", rulethickness = "\the\dimexpr\chemicalparameter\c!rulethickness\relax", @@ -358,13 +361,28 @@ % \let\chemicalsmashedleft \chemicalleftcentered % \let\chemicalsmashedright \chemicalrightcentered -\unexpanded\def\chemicalalignedtext#1#2#3% +\unexpanded\def\chemicalalignedtext + {\ifmmode + \expandafter\chem_aligned_text_math + \else + \expandafter\chem_aligned_text_text + \fi} + +\def\chem_aligned_text_text#1#2#3% {\dontleavehmode \begingroup \usechemicalstyleandcolor\c!style\c!color \hbox to \fontcharwd\font`C{\setstrut\strut#1\molecule{#3}#2}% \endgroup} +\def\chem_aligned_text_math#1#2#3% + {\dontleavehmode + \begingroup + \scratchcounter\normalmathstyle + \usechemicalstyleandcolor\c!style\c!color + \hbox to \fontcharwd\font`C{\setstrut\strut#1\mathematics{\tf\triggermathstyle\scratchcounter\molecule{#3}}#2}% + \endgroup} + \unexpanded\def\chemicalcentered {\chemicalalignedtext\hss \hss } \unexpanded\def\chemicalleftcentered {\chemicalalignedtext\relax\hss } \unexpanded\def\chemicalrightcentered{\chemicalalignedtext\hss \relax} @@ -436,18 +454,37 @@ % inline +% \unexpanded\def\chemical +% {\ifinformula +% \expandafter\displaychemical +% \else +% \expandafter\inlinechemical +% \fi} + \unexpanded\def\chemical {\ifinformula - \expandafter\displaychemical + \expandafter\indisplaychemical \else \expandafter\inlinechemical \fi} +\unexpanded\def\indisplaychemical + {\mathstylecommand\displaychemical\inlinechemical\inlinechemical} + +\unexpanded\def\inlinechemical#1% + {\dontleavehmode + \begingroup + \scratchcounter\normalmathstyle + \usechemicalstyleandcolor\c!style\c!color + \hbox{\mathematics{\tf\triggermathstyle\scratchcounter\ctxcommand{inlinechemical(\!!bs#1\!!es)}}}% + \endgroup} + \unexpanded\def\displaychemical {\dotriplegroupempty\chem_display} -\def\chem_display#1#2#3% todo: - {\the\everychemical \everychemical\emptytoks +\def\chem_display#1#2#3% + {\the\everychemical + \everychemical\emptytoks \quad \vcenter\bgroup \usechemicalstyleandcolor\c!style\c!color @@ -644,28 +681,50 @@ \c!offset=\v!overlay, \c!frame=\v!off] -\definecolor [lightblue] [h=add8e6] % a nice X11 color +\definecolor % private color + [chemicalframecolor] + [r=.75,g=.85,b=.95] \setupchemical [\c!frame=, \c!width=\v!fit, % or unitless number, multiplies scale*EmWidth \c!height=\v!fit, % or unitless number, multiplies scale*EmWidth - \c!left=0, % or unitless number, multiplies scale*EmWidth - \c!right=0, % or unitless number, multiplies scale*EmWidth - \c!top=0, % or unitless number, multiplies scale*EmWidth - \c!bottom=0, % or unitless number, multiplies scale*EmWidth + \c!left=0, % unitless number, multiplies scale*EmWidth + \c!right=0, % unitless number, multiplies scale*EmWidth + \c!top=0, % unitless number, multiplies scale*EmWidth + \c!bottom=0, % unitless number, multiplies scale*EmWidth \c!bodyfont=, - \c!scale=\v!normal, % small, normal or medium, big, unitless number (multiplies EmWidth) + \c!scale=\v!normal, % small, normal or medium, big, or unitless number (multiplies EmWidth) \c!size=\v!medium, \c!textsize=\v!big, % how is textsize used?? \c!axis=\v!off, \c!style=\rm, - \c!location=, + \c!rotation=0, % unitless number (interpreted as degrees) + \c!symalign=\v!auto, + \c!location=, % not yet used (was interaction related in mkii) \c!offset=.25em, \c!color=, - \c!framecolor=lightblue, + \c!strut=\v!yes, + \c!framecolor=chemicalframecolor, \c!rulethickness=0.6pt, %1.5\linewidth, \c!rulecolor=, \c!factor=1] % how is factor used?? +%D Compatibility: + +\definechemical[+R] {\chemical[RR]} +\definechemical[-R] {\chemical[LR]} + +\definechemical[CARBON:CB] {\chemical[NEWMANSTAGGER,C,SB]} +\definechemical[NEWMANSTAGGER:CB] {\chemical[NEWMANSTAGGER,C,SB]} +\definechemical[NEWMANECLIPSED:CB]{\chemical[NEWMANECLIPSED,C,SB]} +\definechemical[CARBON:CB1] {\chemical[CARBON,C,SB,Z234,1.5MOV1,MIR0,C,SB,Z234]} + +\definechemical[NEWMAN] {\chemical[]} +\definechemical[STAGGER] {\chemical[NEWMANSTAGGER]} +\definechemical[ECLIPSE] {\chemical[NEWMANECLIPSED]} +\definechemical[ECLIPSED] {\chemical[NEWMANECLIPSED]} +\definechemical[SIX:FRONT] {\chemical[SIXFRONT]} +\definechemical[FIVE:FRONT] {\chemical[FIVEFRONT]} + \protect \endinput diff --git a/tex/context/base/cldf-bas.mkiv b/tex/context/base/cldf-bas.mkiv index f2bd05177..f8b5b5d6a 100644 --- a/tex/context/base/cldf-bas.mkiv +++ b/tex/context/base/cldf-bas.mkiv @@ -14,5 +14,6 @@ \writestatus{loading}{ConTeXt Lua Documents / Basics} \registerctxluafile{cldf-bas}{1.001} +\registerctxluafile{cldf-prs}{1.001} \endinput diff --git a/tex/context/base/cldf-com.lua b/tex/context/base/cldf-com.lua index d9062594e..fa0dbed3e 100644 --- a/tex/context/base/cldf-com.lua +++ b/tex/context/base/cldf-com.lua @@ -17,9 +17,8 @@ generics.stoptabulate = "stoptabulate" -- "stop" .. variables.tabulate -- tod local NC, NR = context.NC, context.NR local function tabulaterow(how,...) - local t = { ... } - for i=1,#t do - local ti = tostring(t[i]) + for i=1,select("#",...) do + local ti = tostring(select(i,...)) NC() if how then context[how](ti) diff --git a/tex/context/base/cldf-ini.lua b/tex/context/base/cldf-ini.lua index 84ae7314e..b045282b1 100644 --- a/tex/context/base/cldf-ini.lua +++ b/tex/context/base/cldf-ini.lua @@ -25,10 +25,10 @@ local tex = tex context = context or { } local context = context -local format, find, gmatch, gsub, validstring = string.format, string.find, string.gmatch, string.gsub, string.valid +local format, gsub, validstring = string.format, string.gsub, string.valid local next, type, tostring, tonumber, setmetatable = next, type, tostring, tonumber, setmetatable local insert, remove, concat = table.insert, table.remove, table.concat -local lpegmatch, lpegC, lpegS, lpegP, lpegCc = lpeg.match, lpeg.C, lpeg.S, lpeg.P, lpeg.Cc +local lpegmatch, lpegC, lpegS, lpegP, lpegCc, patterns = lpeg.match, lpeg.C, lpeg.S, lpeg.P, lpeg.Cc, lpeg.patterns local texsprint = tex.sprint local textprint = tex.tprint @@ -162,8 +162,8 @@ context.popcatcodes = popcatcodes --~ content / texsprint --~ )^0 -local newline = lpeg.patterns.newline -local space = lpeg.patterns.spacer +local newline = patterns.newline +local space = patterns.spacer local spacing = newline * space^0 local content = lpegC((1-spacing)^1) -- texsprint local emptyline = space^0 * newline^2 -- texprint("") @@ -357,6 +357,8 @@ end -- -- -- +local containseol = patterns.containseol + local function writer(parent,command,first,...) -- already optimized before call local t = { first, ... } flush(currentcatcodes,command) -- todo: ctx|prt|texcatcodes @@ -377,7 +379,7 @@ local function writer(parent,command,first,...) -- already optimized before call flush(currentcatcodes,"{}") elseif typ == "string" then -- is processelines seen ? - if processlines and find(ti,"[\n\r]") then -- we can check for ti == "\n" + if processlines and lpegmatch(containseol,ti) then flush(currentcatcodes,"{") local flushlines = parent.__flushlines or flushlines flushlines(ti) @@ -529,7 +531,7 @@ local function caller(parent,f,a,...) if typ == "string" then if a then flush(contentcatcodes,format(f,a,...)) -- was currentcatcodes - elseif processlines and find(f,"[\n\r]") then + elseif processlines and lpegmatch(containseol,f) then local flushlines = parent.__flushlines or flushlines flushlines(f) else @@ -548,10 +550,9 @@ local function caller(parent,f,a,...) if f then if a ~= nil then local flushlines = parent.__flushlines or flushlines - flushlines(f) - -- ignore ... maybe some day + flushlines(a) else - flushdirect(currentcatcodes,"\r") + flushdirect(currentcatcodes,"\n") -- no \r, else issues with \startlines ... use context.par() otherwise end else if a ~= nil then @@ -635,6 +636,11 @@ local currenttrace = nil local nofwriters = 0 local nofflushes = 0 +local visualizer = lpeg.replacer { + { "\n","<This module is a bit more split up that I'd like but since we also want to test diff --git a/tex/context/base/font-otp.lua b/tex/context/base/font-otp.lua index 6c9827de8..fc98b2bdc 100644 --- a/tex/context/base/font-otp.lua +++ b/tex/context/base/font-otp.lua @@ -7,11 +7,14 @@ if not modules then modules = { } end modules ['font-otp'] = { } -- todo: pack math (but not that much to share) +-- pitfall 5.2: hashed tables can suddenly become indexed with nil slots local next, type = next, type local sort, concat = table.sort, table.concat +local trace_packing = false trackers.register("otf.packing", function(v) trace_packing = v end) local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end) + local report_otf = logs.reporter("fonts","otf loading") -- also used in other scripts so we need to check some tables: @@ -33,28 +36,68 @@ otf.glists = glists local criterium = 1 local threshold = 0 -local function tabstr(t) - local s, n = { }, 0 +local function tabstr_normal(t) + local s = { } + local n = 0 for k, v in next, t do n = n + 1 if type(v) == "table" then - s[n] = k .. "={" .. tabstr(v) .. "}" + s[n] = k .. ">" .. tabstr_normal(v) elseif v == true then - s[n] = k .. "=true" + s[n] = k .. "+" -- "=true" elseif v then s[n] = k .. "=" .. v else - s[n] = k .. "=false" + s[n] = k .. "-" -- "=false" end end - if n == 1 then + if n == 0 then + return "" + elseif n == 1 then return s[1] else - sort(s) + sort(s) -- costly but needed (occasional wrong hit otherwise) return concat(s,",") end end +local function tabstr_flat(t) + local s = { } + local n = 0 + for k, v in next, t do + n = n + 1 + s[n] = k .. "=" .. v + end + if n == 0 then + return "" + elseif n == 1 then + return s[1] + else + sort(s) -- costly but needed (occasional wrong hit otherwise) + return concat(s,",") + end +end + +local function tabstr_boolean(t) + local s = { } + local n = 0 + for k, v in next, t do + n = n + 1 + if v then + s[n] = k .. "+" + else + s[n] = k .. "-" + end + end + if n == 0 then + return "" + elseif n == 1 then + return s[1] + else + sort(s) -- costly but needed (occasional wrong hit otherwise) + return concat(s,",") + end +end -- -- saves only a few tens of bytes -- @@ -74,41 +117,83 @@ local function packdata(data) local h, t, c = { }, { }, { } local hh, tt, cc = { }, { }, { } local nt, ntt = 0, 0 - local function pack_1(v,indexed) - -- v == table - local tag = indexed and concat(v," ") or tabstr(v) + local function pack_normal(v) + local tag = tabstr_normal(v,flat) + local ht = h[tag] + if ht then + c[ht] = c[ht] + 1 + return ht + else + nt = nt + 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt + end + end + local function pack_flat(v) + local tag = tabstr_flat(v) local ht = h[tag] - if not ht then + if ht then + c[ht] = c[ht] + 1 + return ht + else nt = nt + 1 - ht = nt - t[ht] = v - h[tag] = ht - c[ht] = 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt + end + end + local function pack_boolean(v) + local tag = tabstr_boolean(v) + local ht = h[tag] + if ht then + c[ht] = c[ht] + 1 + return ht else + nt = nt + 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt + end + end + local function pack_indexed(v) + local tag = concat(v," ") + local ht = h[tag] + if ht then c[ht] = c[ht] + 1 + return ht + else + nt = nt + 1 + t[nt] = v + h[tag] = nt + c[nt] = 1 + return nt end - return ht end - local function pack_2(v,indexed) + local function pack_final(v) -- v == number if c[v] <= criterium then return t[v] else -- compact hash local hv = hh[v] - if not hv then + if hv then + return hv + else ntt = ntt + 1 - hv = ntt - tt[hv] = t[v] - hh[v] = hv - cc[hv] = c[v] + tt[ntt] = t[v] + hh[v] = ntt + cc[ntt] = c[v] + return ntt end - return hv end end local function success(stage,pass) if nt == 0 then - if trace_loading then + if trace_loading or trace_packing then report_otf("pack quality: nothing to pack") end return false @@ -136,35 +221,45 @@ local function packdata(data) end data.tables = tt end - if trace_loading then + if trace_loading or trace_packing then report_otf("pack quality: stage %s, pass %s, %s packed, 1-10:%s, 11-20:%s, rest:%s (criterium: %s)", stage, pass, one+two+rest, one, two, rest, criterium) end return true else - if trace_loading then + if trace_loading or trace_packing then report_otf("pack quality: stage %s, pass %s, %s packed, aborting pack (threshold: %s)", stage, pass, nt, threshold) end return false end end + local function packers(pass) + if pass == 1 then + return pack_normal, pack_indexed, pack_flat, pack_boolean + else + return pack_final, pack_final, pack_final, pack_final + end + end local resources = data.resources local lookuptypes = resources.lookuptypes for pass=1,2 do - local pack = (pass == 1 and pack_1) or pack_2 + if trace_packing then + report_otf("start packing: stage 1, pass %s",pass) + end + local pack_normal, pack_indexed, pack_flat, pack_boolean = packers(pass) for unicode, description in next, data.descriptions do local boundingbox = description.boundingbox if boundingbox then - description.boundingbox = pack(boundingbox,true) + description.boundingbox = pack_indexed(boundingbox) end local slookups = description.slookups if slookups then for tag, slookup in next, slookups do local what = lookuptypes[tag] if what == "pair" then - local t = slookup[2] if t then slookup[2] = pack(t,true) end - local t = slookup[3] if t then slookup[3] = pack(t,true) end + local t = slookup[2] if t then slookup[2] = pack_indexed(t) end + local t = slookup[3] if t then slookup[3] = pack_indexed(t) end elseif what ~= "substitution" then - slookups[tag] = pack(slookup) + slookups[tag] = pack_indexed(slookup) -- true is new end end end @@ -175,12 +270,12 @@ local function packdata(data) if what == "pair" then for i=1,#mlookup do local lookup = mlookup[i] - local t = lookup[2] if t then lookup[2] = pack(t,true) end - local t = lookup[3] if t then lookup[3] = pack(t,true) end + local t = lookup[2] if t then lookup[2] = pack_indexed(t) end + local t = lookup[3] if t then lookup[3] = pack_indexed(t) end end elseif what ~= "substitution" then for i=1,#mlookup do - mlookup[i] = pack(mlookup[i]) -- true + mlookup[i] = pack_indexed(mlookup[i]) -- true is new end end end @@ -188,7 +283,7 @@ local function packdata(data) local kerns = description.kerns if kerns then for tag, kern in next, kerns do - kerns[tag] = pack(kern) + kerns[tag] = pack_flat(kern) end end local math = description.math @@ -196,7 +291,7 @@ local function packdata(data) local kerns = math.kerns if kerns then for tag, kern in next, kerns do - kerns[tag] = pack(kern) + kerns[tag] = pack_normal(kern) end end end @@ -206,12 +301,14 @@ local function packdata(data) if what == "baselig" then for _, a in next, anchor do for k=1,#a do - a[k] = pack(a[k]) +-- a[k] = pack_normal(a[k]) + a[k] = pack_indexed(a[k]) end end else for k, v in next, anchor do - anchor[k] = pack(v) +-- anchor[k] = pack_normal(v) + anchor[k] = pack_indexed(v) end end end @@ -224,11 +321,11 @@ local function packdata(data) if rules then for i=1,#rules do -- was next loop local rule = rules[i] - local r = rule.before if r then for i=1,#r do r[i] = pack(r[i]) end end - local r = rule.after if r then for i=1,#r do r[i] = pack(r[i]) end end - local r = rule.current if r then for i=1,#r do r[i] = pack(r[i]) end end - local r = rule.replacements if r then rule.replacements = pack(r) end - local r = rule.lookups if r then rule.lookups = pack(r) end + local r = rule.before if r then for i=1,#r do r[i] = pack_boolean(r[i]) end end + local r = rule.after if r then for i=1,#r do r[i] = pack_boolean(r[i]) end end + local r = rule.current if r then for i=1,#r do r[i] = pack_boolean(r[i]) end end + local r = rule.replacements if r then rule.replacements = pack_boolean(r) end + local r = rule.lookups if r then rule.lookups = pack_boolean(r) end end end end @@ -236,13 +333,13 @@ local function packdata(data) local anchor_to_lookup = resources.anchor_to_lookup if anchor_to_lookup then for anchor, lookup in next, anchor_to_lookup do - anchor_to_lookup[anchor] = pack(lookup) + anchor_to_lookup[anchor] = pack_normal(lookup) end end local lookup_to_anchor = resources.lookup_to_anchor if lookup_to_anchor then for lookup, anchor in next, lookup_to_anchor do - lookup_to_anchor[lookup] = pack(anchor) + lookup_to_anchor[lookup] = pack_normal(anchor) end end local sequences = resources.sequences @@ -250,16 +347,16 @@ local function packdata(data) for feature, sequence in next, sequences do local flags = sequence.flags if flags then - sequence.flags = pack(flags) + sequence.flags = pack_normal(flags) end local subtables = sequence.subtables if subtables then - sequence.subtables = pack(subtables) + sequence.subtables = pack_normal(subtables) end local features = sequence.features if features then for script, feature in next, features do - features[script] = pack(feature) + features[script] = pack_normal(feature) end end end @@ -269,11 +366,11 @@ local function packdata(data) for name, lookup in next, lookups do local flags = lookup.flags if flags then - lookup.flags = pack(flags) + lookup.flags = pack_normal(flags) end local subtables = lookup.subtables if subtables then - lookup.subtables = pack(subtables) + lookup.subtables = pack_normal(subtables) end end end @@ -283,7 +380,7 @@ local function packdata(data) local list = features[what] if list then for feature, spec in next, list do - list[feature] = pack(spec) + list[feature] = pack_normal(spec) end end end @@ -294,27 +391,30 @@ local function packdata(data) end if nt > 0 then for pass=1,2 do - local pack = (pass == 1 and pack_1) or pack_2 + if trace_packing then + report_otf("start packing: stage 2, pass %s",pass) + end + local pack_normal, pack_indexed, pack_flat, pack_boolean = packers(pass) for unicode, description in next, data.descriptions do local kerns = description.kerns if kerns then - description.kerns = pack(kerns) + description.kerns = pack_normal(kerns) end local math = description.math if math then local kerns = math.kerns if kerns then - math.kerns = pack(kerns) + math.kerns = pack_normal(kerns) end end local anchors = description.anchors if anchors then - description.anchors = pack(anchors) + description.anchors = pack_normal(anchors) end local mlookups = description.mlookups if mlookups then for tag, mlookup in next, mlookups do - mlookups[tag] = pack(mlookup) + mlookups[tag] = pack_normal(mlookup) end end end @@ -325,9 +425,9 @@ local function packdata(data) if rules then for i=1,#rules do -- was next loop local rule = rules[i] - local r = rule.before if r then rule.before = pack(r) end - local r = rule.after if r then rule.after = pack(r) end - local r = rule.current if r then rule.current = pack(r) end + local r = rule.before if r then rule.before = pack_normal(r) end + local r = rule.after if r then rule.after = pack_normal(r) end + local r = rule.current if r then rule.current = pack_normal(r) end end end end @@ -335,7 +435,7 @@ local function packdata(data) local sequences = resources.sequences if sequences then for feature, sequence in next, sequences do - sequence.features = pack(sequence.features) + sequence.features = pack_normal(sequence.features) end end if not success(2,pass) then @@ -344,15 +444,15 @@ local function packdata(data) end for pass=1,2 do - local pack = (pass == 1 and pack_1) or pack_2 + local pack_normal, pack_indexed, pack_flat, pack_boolean = packers(pass) for unicode, description in next, data.descriptions do local slookups = description.slookups if slookups then - description.slookups = pack(slookups) + description.slookups = pack_normal(slookups) end local mlookups = description.mlookups if mlookups then - description.mlookups = pack(mlookups) + description.mlookups = pack_normal(mlookups) end end end diff --git a/tex/context/base/font-syn.lua b/tex/context/base/font-syn.lua index 9be307099..3f90da91b 100644 --- a/tex/context/base/font-syn.lua +++ b/tex/context/base/font-syn.lua @@ -8,7 +8,6 @@ if not modules then modules = { } end modules ['font-syn'] = { -- todo: subs in lookups requests -local utf = unicode.utf8 local next, tonumber = next, tonumber local sub, gsub, lower, match, find, lower, upper = string.sub, string.gsub, string.lower, string.match, string.find, string.lower, string.upper local find, gmatch = string.find, string.gmatch diff --git a/tex/context/base/font-vf.lua b/tex/context/base/font-vf.lua index 01d5289f8..34d74d93f 100644 --- a/tex/context/base/font-vf.lua +++ b/tex/context/base/font-vf.lua @@ -7,10 +7,13 @@ if not modules then modules = { } end modules ['font-vf'] = { } --[[ldx-- -
This is very experimental code! Not yet adapted to recent -changes. This will change.
+This is very experimental code! Not yet adapted to recent changes. This will change.
--ldx]]-- +-- present in the backend but unspecified: +-- +-- vf.rule vf.special vf.right vf.push vf.down vf.char vf.node vf.fontid vf.pop vf.image vf.nop + local next = next local allocate = utilities.storage.allocate diff --git a/tex/context/base/l-dir.lua b/tex/context/base/l-dir.lua index 3deb660ce..0568bcfb5 100644 --- a/tex/context/base/l-dir.lua +++ b/tex/context/base/l-dir.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['l-dir'] = { -- dir.expandname will be merged with cleanpath and collapsepath -local type = type +local type, select = type, select local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local lpegmatch = lpeg.match @@ -261,15 +261,15 @@ local onwindows = os.type == "windows" or find(os.getenv("PATH"),";") if onwindows then function dir.mkdirs(...) - local str, pth, t = "", "", { ... } - for i=1,#t do - local s = t[i] - if s ~= "" then - if str ~= "" then - str = str .. "/" .. s - else - str = s - end + local str, pth = "", "" + for i=1,select("#",...) do + local s = select(i,...) + if s == "" then + -- skip + elseif str == "" then + str = s + else + str = str .. "/" .. s end end local first, middle, last @@ -329,9 +329,9 @@ if onwindows then else function dir.mkdirs(...) - local str, pth, t = "", "", { ... } - for i=1,#t do - local s = t[i] + local str, pth = "", "" + for i=1,select("#",...) do + local s = select(i,...) if s and s ~= "" then -- we catch nil and false if str ~= "" then str = str .. "/" .. s diff --git a/tex/context/base/l-file.lua b/tex/context/base/l-file.lua index d1ec753b1..f34bed5fd 100644 --- a/tex/context/base/l-file.lua +++ b/tex/context/base/l-file.lua @@ -36,25 +36,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = C((noslashes^0 * slashes^1)^1) local function pathpart(name,default) - return lpegmatch(pattern,name) or default or "" + return name and lpegmatch(pattern,name) or default or "" end local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 local function basename(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0 local function nameonly(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 local function suffixonly(name) - return lpegmatch(pattern,name) or "" + return name and lpegmatch(pattern,name) or "" end file.pathpart = pathpart @@ -85,7 +85,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture local pattern_d = path * rest function file.splitname(str,splitdrive) - if splitdrive then + if not str then + -- error + elseif splitdrive then return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix else return lpegmatch(pattern_b,str) -- returns path, base, suffix @@ -93,34 +95,36 @@ function file.splitname(str,splitdrive) end function file.splitbase(str) - return lpegmatch(pattern_d,str) -- returns path, base+suffix + return str and lpegmatch(pattern_d,str) -- returns path, base+suffix end function file.nametotable(str,splitdrive) -- returns table - local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) - if splitdrive then - return { - path = path, - drive = drive, - subpath = subpath, - name = name, - base = base, - suffix = suffix, - } - else - return { - path = path, - name = name, - base = base, - suffix = suffix, - } + if str then + local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) + if splitdrive then + return { + path = path, + drive = drive, + subpath = subpath, + name = name, + base = base, + suffix = suffix, + } + else + return { + path = path, + name = name, + base = base, + suffix = suffix, + } + end end end local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1) function file.removesuffix(name) - return lpegmatch(pattern,name) + return name and lpegmatch(pattern,name) end -- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 @@ -137,8 +141,8 @@ end local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) -function file.addsuffix(filename, suffix, criterium) - if not suffix or suffix == "" then +function file.addsuffix(filename,suffix,criterium) + if not filename or not suffix or suffix == "" then return filename elseif criterium == true then return filename .. "." .. suffix @@ -184,7 +188,7 @@ local suffix = period * (1-period-slashes)^1 * -1 local pattern = Cs((1-suffix)^0) function file.replacesuffix(name,suffix) - if suffix and suffix ~= "" then + if name and suffix and suffix ~= "" then return lpegmatch(pattern,name) .. "." .. suffix else return name @@ -193,10 +197,10 @@ end -- -local reslasher = lpeg.replacer(S("\\"),"/") +local reslasher = lpeg.replacer(P("\\"),"/") function file.reslash(str) - return lpegmatch(reslasher,str) + return str and lpegmatch(reslasher,str) end -- We should be able to use: @@ -212,7 +216,9 @@ end -- variant: function file.is_writable(name) - if lfs.isdir(name) then + if not name then + -- error + elseif lfs.isdir(name) then name = name .. "/m_t_x_t_e_s_t.tmp" local f = io.open(name,"wb") if f then @@ -240,24 +246,32 @@ end local readable = P("r") * Cc(true) function file.is_readable(name) - local a = attributes(name) - return a and lpegmatch(readable,a.permissions) or false + if name then + local a = attributes(name) + return a and lpegmatch(readable,a.permissions) or false + else + return false + end end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated function file.size(name) - local a = attributes(name) - return a and a.size or 0 + if name then + local a = attributes(name) + return a and a.size or 0 + else + return 0 + end end function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) - return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) + return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) end function file.joinpath(tab,separator) -- table - return concat(tab,separator or io.pathseparator) -- can have trailing // + return tab and concat(tab,separator or io.pathseparator) -- can have trailing // end local stripper = Cs(P(fwslash)^0/"" * reslasher) @@ -265,14 +279,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon local isroot = fwslash^1 * -1 local hasroot = fwslash^1 -function file.join(...) -- rather dirty +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...) local lst = { ... } local one = lst[1] if lpegmatch(isnetwork,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) return one .. "/" .. two elseif lpegmatch(isroot,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) if lpegmatch(hasroot,two) then return two else @@ -281,7 +304,7 @@ function file.join(...) -- rather dirty elseif one == "" then return lpegmatch(stripper,concat(lst,"/",2)) else - return lpegmatch(reslasher,concat(lst,"/")) + return lpegmatch(deslasher,concat(lst,"/")) end end @@ -310,6 +333,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C local absolute = fwslash function file.collapsepath(str,anchor) + if not str then + return + end if anchor and not lpegmatch(anchors,str) then str = getcurrentdir() .. "/" .. str end @@ -319,7 +345,6 @@ function file.collapsepath(str,anchor) return lpegmatch(reslasher,str) end local starter, oldelements = lpegmatch(splitstarter,str) --- inspect(oldelements) local newelements = { } local i = #oldelements while i > 0 do @@ -373,11 +398,13 @@ local whatever = P("-")^0 / "" local pattern_b = Cs(whatever * (1 - whatever * -1)^1) function file.robustname(str,strict) - str = lpegmatch(pattern_a,str) or str - if strict then - return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) - else - return str + if str then + str = lpegmatch(pattern_a,str) or str + if strict then + return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) + else + return str + end end end @@ -385,7 +412,9 @@ file.readdata = io.loaddata file.savedata = io.savedata function file.copy(oldname,newname) - file.savedata(newname,io.loaddata(oldname)) + if oldname and newname then + file.savedata(newname,io.loaddata(oldname)) + end end -- also rewrite previous @@ -406,11 +435,11 @@ lpeg.patterns.rootbased = rootbased -- ./name ../name /name c: :// name/name function file.is_qualified_path(filename) - return lpegmatch(qualified,filename) ~= nil + return filename and lpegmatch(qualified,filename) ~= nil end function file.is_rootbased_path(filename) - return lpegmatch(rootbased,filename) ~= nil + return filename and lpegmatch(rootbased,filename) ~= nil end -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end @@ -432,8 +461,10 @@ end -- for myself: function file.strip(name,dir) - local b, a = match(name,"^(.-)" .. dir .. "(.*)$") - return a ~= "" and a or name + if name then + local b, a = match(name,"^(.-)" .. dir .. "(.*)$") + return a ~= "" and a or name + end end -- local debuglist = { diff --git a/tex/context/base/l-io.lua b/tex/context/base/l-io.lua index ec628b5e0..e7bc23642 100644 --- a/tex/context/base/l-io.lua +++ b/tex/context/base/l-io.lua @@ -328,7 +328,7 @@ function io.readstring(f,n,m) f:seek("set",n) n = m end - local str = gsub(f:read(n),"%z","") + local str = gsub(f:read(n),"\000","") return str end diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index a5fdec765..b00d02f9f 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -6,9 +6,11 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } - -- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) +-- move utf -> l-unicode +-- move string -> l-string or keep it here + local lpeg = require("lpeg") -- tracing (only used when we encounter a problem in integration of lpeg in luatex) @@ -60,12 +62,9 @@ local byte, char, gmatch, format = string.byte, string.char, string.gmatch, stri lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt local lpegtype, lpegmatch = lpeg.type, lpeg.match -local utfcharacters = string.utfcharacters -local utfgmatch = unicode and unicode.utf8.gmatch - local anything = P(1) local endofstring = P(-1) local alwaysmatched = P(true) @@ -91,9 +90,12 @@ local utfbom_8 = P('\239\187\191') local utfbom = utfbom_32_be + utfbom_32_le + utfbom_16_be + utfbom_16_le + utfbom_8 -local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le" - + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le" - + utfbom_8 / "utf-8" + alwaysmatched / "unknown" +local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + + utfbom_8 * Cc(3) + Cc(0) local utf8next = R("\128\191") @@ -103,6 +105,7 @@ patterns.utf8three = R("\224\239") * utf8next * utf8next patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next patterns.utfbom = utfbom patterns.utftype = utftype +patterns.utfoffset = utfoffset local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) @@ -115,6 +118,22 @@ patterns.validutf8char = validutf8char local eol = S("\n\r") local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) local whitespace = eol + spacer +local nonspacer = 1 - spacer +local nonwhitespace = 1 - whitespace + +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace +patterns.nonspacer = nonspacer +patterns.nonwhitespace = nonwhitespace + +local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto + +----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) + +patterns.stripper = stripper +patterns.collapser = collapser patterns.digit = digit patterns.sign = sign @@ -137,13 +156,8 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = eol -patterns.spacer = spacer -patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - spacer -patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") patterns.commaspacer = P(",") * spacer^0 @@ -156,8 +170,8 @@ patterns.squote = squote patterns.dquote = dquote patterns.nosquote = (escaped + (1-squote))^0 patterns.nodquote = (escaped + (1-dquote))^0 -patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") +patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble patterns.unspacer = ((patterns.spacer^1)/"")^0 @@ -165,16 +179,11 @@ patterns.singlequoted = squote * patterns.nosquote * squote patterns.doublequoted = dquote * patterns.nodquote * dquote patterns.quoted = patterns.doublequoted + patterns.singlequoted +patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) + patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) --- print(string.unquoted("test")) --- print(string.unquoted([["t\"est"]])) --- print(string.unquoted([["t\"est"x]])) --- print(string.unquoted("\'test\'")) --- print(string.unquoted('"test"')) --- print(string.unquoted('"test"')) - local function anywhere(pattern) --slightly adapted from website return P { P(pattern) + 1 * V(1) } end @@ -237,10 +246,10 @@ function string.splitup(str,separator) return lpegmatch(splitters_m[separator] or splitat(separator),str) end ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps local cache = { } @@ -273,12 +282,6 @@ local content = (empty + nonempty)^1 patterns.textline = content ---~ local linesplitter = Ct(content^0) ---~ ---~ function string.splitlines(str) ---~ return lpegmatch(linesplitter,str) ---~ end - local linesplitter = tsplitat(newline) patterns.linesplitter = linesplitter @@ -287,66 +290,7 @@ function string.splitlines(str) return lpegmatch(linesplitter,str) end -local utflinesplitter = utfbom^-1 * tsplitat(newline) - -patterns.utflinesplitter = utflinesplitter - -function string.utfsplitlines(str) - return lpegmatch(utflinesplitter,str or "") -end - -local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) -local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) - -function string.utfsplit(str,ignorewhitespace) -- new - if ignorewhitespace then - return lpegmatch(utfcharsplitter_iws,str or "") - else - return lpegmatch(utfcharsplitter_ows,str or "") - end -end - --- inspect(string.utfsplit("a b c d")) --- inspect(string.utfsplit("a b c d",true)) - --- -- alternative 1: 0.77 --- --- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) --- --- function string.utflength(str) --- return #lpegmatch(utfcharcounter,str or "") --- end --- --- -- alternative 2: 1.70 --- --- local n = 0 --- --- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow --- --- function string.utflength(str) --- n = 0 --- lpegmatch(utfcharcounter,str or "") --- return n --- end --- --- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) - -local n = 0 - -local utfcharcounter = utfbom^-1 * Cs ( ( - Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end - + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end - + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end - + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end -)^0 ) - -function string.utflength(str) - n = 0 - lpegmatch(utfcharcounter,str or "") - return n -end - ---~ lpeg.splitters = cache -- no longer public +-- lpeg.splitters = cache -- no longer public local cache = { } @@ -372,7 +316,7 @@ function string.checkedsplit(str,separator) return lpegmatch(c,str) end ---~ from roberto's site: +-- from roberto's site: local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end @@ -430,8 +374,11 @@ end -- Just for fun I looked at the used bytecode and -- p = (p and p + pp) or pp gets one more (testset). -function lpeg.replacer(one,two,makefunction) +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys local pattern + local u = isutf and utf8char or 1 if type(one) == "table" then local no = #one local p = P(false) @@ -439,24 +386,21 @@ function lpeg.replacer(one,two,makefunction) for k, v in next, one do p = p + P(k) / v end - pattern = Cs((p + 1)^0) + pattern = Cs((p + u)^0) elseif no == 1 then local o = one[1] one, two = P(o[1]), o[2] -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two + 1)^0) + pattern = Cs((one/two + u)^0) else for i=1,no do local o = one[i] p = p + P(o[1]) / o[2] end - pattern = Cs((p + 1)^0) + pattern = Cs((p + u)^0) end else - one = P(one) - two = two or "" - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two +1)^0) + pattern = Cs((P(one)/(two or "") + u)^0) end if makefunction then return function(str) @@ -470,14 +414,20 @@ end function lpeg.finder(lst,makefunction) local pattern if type(lst) == "table" then - local p = P(false) - for i=1,#lst do - p = p + P(lst[i]) + pattern = P(false) + if #lst == 0 then + for k, v in next, lst do + pattern = pattern + P(k) -- ignore key, so we can use a replacer table + end + else + for i=1,#lst do + pattern = pattern + P(lst[i]) + end end - pattern = (p + 1)^0 else - pattern = (P(lst) + 1)^0 + pattern = P(lst) end + pattern = (1-pattern)^0 * pattern if makefunction then return function(str) return lpegmatch(pattern,str) @@ -518,21 +468,21 @@ function lpeg.balancer(left,right) return P { left * ((1 - left - right) + V(1))^0 * right } end ---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) ---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) ---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) - ---~ -- slower: ---~ ---~ function lpeg.counter(pattern) ---~ local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 ---~ return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end ---~ end +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +-- -- slower: +-- +-- function lpeg.counter(pattern) +-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 +-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end local nany = utf8char/"" @@ -543,65 +493,12 @@ function lpeg.counter(pattern) end end -if utfgmatch then - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local n = 0 - for _ in utfgmatch(str,what) do - n = n + 1 - end - return n - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -else - - local cache = { } - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local p = cache[what] - if not p then - p = Cs((P(what)/" " + nany)^0) - cache[p] = p - end - return #lpegmatch(p,str) - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -end - -local patterns_escapes = { -- also defines in l-string - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%)", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} - -local simple_escapes = { -- also defines in l-string - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} - -local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) -local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) - -function string.escapedpattern(str,simple) - return lpegmatch(simple and s or p,str) -end - -- utf extensies +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch = unicode and unicode.utf8.gmatch +local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char) + lpeg.UP = lpeg.P if utfcharacters then @@ -640,8 +537,6 @@ end local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture -local utfchar = unicode and unicode.utf8 and unicode.utf8.char - function lpeg.UR(str,more) local first, last if type(str) == "number" then @@ -672,16 +567,16 @@ end -- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) ---~ lpeg.print(lpeg.R("ab","cd","gh")) ---~ lpeg.print(lpeg.P("a","b","c")) ---~ lpeg.print(lpeg.S("a","b","c")) +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) ---~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) ---~ print(lpeg.count("äáàa",lpeg.UP("áà"))) ---~ print(lpeg.count("äáàa",lpeg.US("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("aá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) function lpeg.is_lpeg(p) return p and lpegtype(p) == "pattern" @@ -703,12 +598,30 @@ end -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we -- loop back from the end cq. prepend. -local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! +local sort = table.sort + +local function copyindexed(old) + local new = { } + for i=1,#old do + new[i] = old + end + return new +end + +local function sortedkeys(tab) + local keys, s = { }, 0 + for key,_ in next, tab do + s = s + 1 + keys[s] = key + end + sort(keys) + return keys +end function lpeg.append(list,pp,delayed,checked) local p = pp if #list > 0 then - local keys = fastcopy(list) + local keys = copyindexed(list) sort(keys) for i=#keys,1,-1 do local k = keys[i] @@ -805,8 +718,10 @@ end local function make(t) local p --- for k, v in next, t do - for k, v in table.sortedhash(t) do + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] + local v = t[k] if not p then if next(v) then p = P(k) * make(v) @@ -824,7 +739,7 @@ local function make(t) return p end -function lpeg.utfchartabletopattern(list) +function lpeg.utfchartabletopattern(list) -- goes to util-lpg local tree = { } for i=1,#list do local t = tree @@ -856,20 +771,8 @@ end -- utfchar(0x205F), -- math thinspace -- } ) --- handy from within tex: - -local lpegmatch = lpeg.match - -local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! - -function string.tformat(fmt,...) - return format(lpegmatch(replacer,fmt),...) -end - --- strips leading and trailing spaces and collapsed all other spaces - -local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 -function string.collapsespaces(str) - return lpegmatch(pattern,str) -end +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol diff --git a/tex/context/base/l-lua.lua b/tex/context/base/l-lua.lua new file mode 100644 index 000000000..8ac351417 --- /dev/null +++ b/tex/context/base/l-lua.lua @@ -0,0 +1,107 @@ +if not modules then modules = { } end modules ['l-lua'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- compatibility hacks ... try to avoid usage + +local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$") + +_MAJORVERSION = tonumber(major) or 5 +_MINORVERSION = tonumber(minor) or 1 + +-- basics: + +if loadstring then + + local loadnormal = load + + function load(first,...) + if type(first) == "string" then + return loadstring(first,...) + else + return loadnormal(first,...) + end + end + +else + + loadstring = load + +end + +-- table: + +-- Starting with version 5.2 Lua no longer provide ipairs, which makes +-- sense. As we already used the for loop and # in most places the +-- impact on ConTeXt was not that large; the remaining ipairs already +-- have been replaced. In a similar fashion we also hardly used pairs. +-- +-- Hm, actually ipairs was retained, but we no longer use it anyway. +-- +-- Just in case, we provide the fallbacks as discussed in Programming +-- in Lua (http://www.lua.org/pil/7.3.html): + +if not ipairs then + + -- for k, v in ipairs(t) do ... end + -- for k=1,#t do local v = t[k] ... end + + local function iterate(a,i) + i = i + 1 + local v = a[i] + if v ~= nil then + return i, v --, nil + end + end + + function ipairs(a) + return iterate, a, 0 + end + +end + +if not pairs then + + -- for k, v in pairs(t) do ... end + -- for k, v in next, t do ... end + + function pairs(t) + return next, t -- , nil + end + +end + +-- The unpack function has been moved to the table table, and for compatiility +-- reasons we provide both now. + +if not table.unpack then + + table.unpack = _G.unpack + +elseif not unpack then + + _G.unpack = table.unpack + +end + +-- package: + +-- if not package.seachers then +-- +-- package.searchers = package.loaders -- 5.2 +-- +-- elseif not package.loaders then +-- +-- package.loaders = package.searchers +-- +-- end + +if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type + + package.loaders = package.searchers + +end diff --git a/tex/context/base/l-number.lua b/tex/context/base/l-number.lua index a4dbe3bdf..f974f2582 100644 --- a/tex/context/base/l-number.lua +++ b/tex/context/base/l-number.lua @@ -16,10 +16,129 @@ local lpegmatch = lpeg.match number = number or { } local number = number --- a,b,c,d,e,f = number.toset(100101) +if bit32 then + + local btest, bor = bit32.btest, bit32.bor + + function number.bit(p) + return 2 ^ (p - 1) -- 1-based indexing + end + + number.hasbit = btest + number.setbit = bor + + function number.setbit(x,p) + return btest(x,p) and x or x + p + end + + function number.clearbit(x,p) + return btest(x,p) and x - p or x + end + +else + + -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html + + function number.bit(p) + return 2 ^ (p - 1) -- 1-based indexing + end + + function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... + return x % (p + p) >= p + end + + function number.setbit(x, p) + return (x % (p + p) >= p) and x or x + p + end + + function number.clearbit(x, p) + return (x % (p + p) >= p) and x - p or x + end -function number.toset(n) - return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +end + +-- print(number.tobitstring(8)) +-- print(number.tobitstring(14)) +-- print(number.tobitstring(66)) +-- print(number.tobitstring(0x00)) +-- print(number.tobitstring(0xFF)) +-- print(number.tobitstring(46260767936,4)) + +if bit32 then + + local bextract = bit32.extract + + local t = { + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + } + + function number.tobitstring(b,m) + -- if really needed we can speed this one up + -- because small numbers need less extraction + local n = 32 + for i=0,31 do + local v = bextract(b,i) + local k = 32 - i + if v == 1 then + n = k + t[k] = "1" + else + t[k] = "0" + end + end + if m then + m = 33 - m * 8 + if m < 1 then + m = 1 + end + return concat(t,"",m) + elseif n < 8 then + return concat(t) + elseif n < 16 then + return concat(t,"",9) + elseif n < 24 then + return concat(t,"",17) + else + return concat(t,"",25) + end + end + +else + + function number.tobitstring(n,m) + if n > 0 then + local t = { } + while n > 0 do + insert(t,1,n % 2 > 0 and 1 or 0) + n = floor(n/2) + end + local nn = 8 - #t % 8 + if nn > 0 and nn < 8 then + for i=1,nn do + insert(t,1,0) + end + end + if m then + m = m * 8 - #t + if m > 0 then + insert(t,1,rep("0",m)) + end + end + return concat(t) + elseif m then + rep("00000000",m) + else + return "00000000" + end + end + +end + +function number.valid(str,default) + return tonumber(str) or default or nil end function number.toevenhex(n) @@ -31,104 +150,57 @@ function number.toevenhex(n) end end --- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% --- on +-- a,b,c,d,e,f = number.toset(100101) +-- +-- function number.toset(n) +-- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +-- end +-- +-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% +-- -- on -- -- for i=1,1000000 do -- local a,b,c,d,e,f,g,h = number.toset(12345678) -- local a,b,c,d = number.toset(1234) -- local a,b,c = number.toset(123) +-- local a,b,c = number.toset("123") -- end --- --- of course dedicated "(.)(.)(.)(.)" matches are even faster -local one = lpeg.C(1-lpeg.S(''))^1 +local one = lpeg.C(1-lpeg.S('')/tonumber)^1 function number.toset(n) return lpegmatch(one,tostring(n)) end -function number.bits(n,zero) - local t, i = { }, (zero and 0) or 1 - while n > 0 do +-- function number.bits(n,zero) +-- local t, i = { }, (zero and 0) or 1 +-- while n > 0 do +-- local m = n % 2 +-- if m > 0 then +-- insert(t,1,i) +-- end +-- n = floor(n/2) +-- i = i + 1 +-- end +-- return t +-- end +-- +-- -- a bit faster + +local function bits(n,i,...) + if n > 0 then local m = n % 2 + local n = floor(n/2) if m > 0 then - insert(t,1,i) - end - n = floor(n/2) - i = i + 1 - end - return t -end - ---~ http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html - -function number.bit(p) - return 2 ^ (p - 1) -- 1-based indexing -end - -function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... - return x % (p + p) >= p -end - -function number.setbit(x, p) - return (x % (p + p) >= p) and x or x + p -end - -function number.clearbit(x, p) - return (x % (p + p) >= p) and x - p or x -end - ---~ function number.tobitstring(n) ---~ if n == 0 then ---~ return "0" ---~ else ---~ local t = { } ---~ while n > 0 do ---~ insert(t,1,n % 2 > 0 and 1 or 0) ---~ n = floor(n/2) ---~ end ---~ return concat(t) ---~ end ---~ end - -function number.tobitstring(n,m) - if n == 0 then - if m then - rep("00000000",m) + return bits(n, i+1, i, ...) else - return "00000000" + return bits(n, i+1, ...) end else - local t = { } - while n > 0 do - insert(t,1,n % 2 > 0 and 1 or 0) - n = floor(n/2) - end - local nn = 8 - #t % 8 - if nn > 0 and nn < 8 then - for i=1,nn do - insert(t,1,0) - end - end - if m then - m = m * 8 - #t - if m > 0 then - insert(t,1,rep("0",m)) - end - end - return concat(t) + return ... end end ---~ print(number.tobitstring(8)) ---~ print(number.tobitstring(14)) ---~ print(number.tobitstring(66)) ---~ print(number.tobitstring(0x00)) ---~ print(number.tobitstring(0xFF)) ---~ print(number.tobitstring(46260767936,8)) ---~ print(#number.tobitstring(46260767936,6)) - -function number.valid(str,default) - return tonumber(str) or default or nil +function number.bits(n) + return { bits(n,1) } end diff --git a/tex/context/base/l-string.lua b/tex/context/base/l-string.lua index 857acb019..597ce3462 100644 --- a/tex/context/base/l-string.lua +++ b/tex/context/base/l-string.lua @@ -7,40 +7,46 @@ if not modules then modules = { } end modules ['l-string'] = { } local string = string -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct - --- some functions may disappear as they are not used anywhere - -if not string.split then - - -- this will be overloaded by a faster lpeg variant - - function string.split(str,pattern) - local t = { } - if #str > 0 then - local n = 1 - for s in gmatch(str..pattern,"(.-)"..pattern) do - t[n] = s - n = n + 1 - end - end - return t - end - -end +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs + +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). + +-- if not string.split then +-- +-- function string.split(str,pattern) +-- local t = { } +-- if #str > 0 then +-- local n = 1 +-- for s in gmatch(str..pattern,"(.-)"..pattern) do +-- t[n] = s +-- n = n + 1 +-- end +-- end +-- return t +-- end +-- +-- end + +-- function string.unquoted(str) +-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end + +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote + + patterns.dquote * C(patterns.nodquote) * patterns.dquote function string.unquoted(str) - return (gsub(str,"^([\"\'])(.*)%1$","%2")) + return lpegmatch(unquoted,str) or str end ---~ function stringunquoted(str) ---~ if find(str,"^[\'\"]") then ---~ return sub(str,2,-2) ---~ else ---~ return str ---~ end ---~ end +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) function string.quoted(str) return format("%q",str) -- always " @@ -63,65 +69,112 @@ function string.limit(str,n,sentinel) -- not utf proof end end -local space = S(" \t\v\n") -local nospace = 1 - space -local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code +local stripper = patterns.stripper +local collapser = patterns.collapser function string.strip(str) return lpegmatch(stripper,str) or "" end +function string.collapsespaces(str) + return lpegmatch(collapser,str) or "" +end + +-- function string.is_empty(str) +-- return not find(str,"%S") +-- end + +local pattern = P(" ")^0 * P(-1) + function string.is_empty(str) - return not find(str,"%S") + if str == "" then + return true + else + return lpegmatch(pattern,str) and true or false + end end -local patterns_escapes = { - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%(", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} -local simple_escapes = { - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} +-- if not string.escapedpattern then +-- +-- local patterns_escapes = { +-- ["%"] = "%%", +-- ["."] = "%.", +-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +-- ["["] = "%[", ["]"] = "%]", +-- ["("] = "%(", [")"] = "%)", +-- -- ["{"] = "%{", ["}"] = "%}" +-- -- ["^"] = "%^", ["$"] = "%$", +-- } +-- +-- local simple_escapes = { +-- ["-"] = "%-", +-- ["."] = "%.", +-- ["?"] = ".", +-- ["*"] = ".*", +-- } +-- +-- function string.escapedpattern(str,simple) +-- return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +-- end +-- +-- function string.topattern(str,lowercase,strict) +-- if str == "" then +-- return ".*" +-- else +-- str = gsub(str,".",simple_escapes) +-- if lowercase then +-- str = lower(str) +-- end +-- if strict then +-- return "^" .. str .. "$" +-- else +-- return str +-- end +-- end +-- end +-- +-- end + +--- needs checking + +local anything = patterns.anything +local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?") -- wildcard and single match + +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") ) function string.escapedpattern(str,simple) - return (gsub(str,".",simple and simple_escapes or patterns_escapes)) + return lpegmatch(simple and pattern_b or pattern_a,str) end function string.topattern(str,lowercase,strict) if str == "" then return ".*" + elseif strict then + str = lpegmatch(pattern_c,str) else - str = gsub(str,".",simple_escapes) - if lowercase then - str = lower(str) - end - if strict then - return "^" .. str .. "$" - else - return str - end + str = lpegmatch(pattern_b,str) + end + if lowercase then + return lower(str) + else + return str end end +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern ("12+34*.tex",false,false)) +-- print(string.topattern ("12+34*.tex",false,true)) function string.valid(str,default) return (type(str) == "string" and str ~= "" and str) or default or nil end --- obsolete names: - -string.quote = string.quoted -string.unquote = string.unquoted - -- handy fallback string.itself = function(s) return s end @@ -133,3 +186,16 @@ local pattern = Ct(C(1)^0) -- string and not utf ! function string.totable(str) return lpegmatch(pattern,str) end + +-- handy from within tex: + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- obsolete names: + +string.quote = string.quoted +string.unquote = string.unquoted diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua index 62e0cae14..b668281a8 100644 --- a/tex/context/base/l-table.lua +++ b/tex/context/base/l-table.lua @@ -6,68 +6,23 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match +local format, lower, dump = string.format, string.lower, string.dump local getmetatable, setmetatable = getmetatable, setmetatable local getinfo = debug.getinfo - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashion we also hardly used pairs. --- --- Hm, actually ipairs was retained, but we no longer use it anyway. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - - -- for k, v in ipairs(t) do ... end - -- for k=1,#t do local v = t[k] ... end - - local function iterate(a,i) - i = i + 1 - local v = a[i] - if v ~= nil then - return i, v --, nil - end - end - - function ipairs(a) - return iterate, a, 0 - end - -end - -if not pairs then - - -- for k, v in pairs(t) do ... end - -- for k, v in next, t do ... end - - function pairs(t) - return next, t -- , nil - end - -end - --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then - table.unpack = _G.unpack -elseif not unpack then - _G.unpack = table.unpack -end +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor -- extra functions, some might go (when not used) +local stripper = patterns.stripper + function table.strip(tab) local lst, l = { }, 0 for i=1,#tab do - local s = gsub(tab[i],"^%s*(.-)%s*$","%1") + local s = lpegmatch(stripper,tab[i]) or "" if s == "" then -- skip this one else @@ -176,7 +131,7 @@ local function sortedhash(t) end table.sortedhash = sortedhash -table.sortedpairs = sortedhash +table.sortedpairs = sortedhash -- obsolete function table.append(t,list) local n = #t @@ -200,31 +155,63 @@ function table.prepend(t, list) return t end +-- function table.merge(t, ...) -- first one is target +-- t = t or { } +-- local lst = { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- t[k] = v +-- end +-- end +-- return t +-- end + function table.merge(t, ...) -- first one is target t = t or { } - local lst = { ... } - for i=1,#lst do - for k, v in next, lst[i] do + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do t[k] = v end end return t end +-- function table.merged(...) +-- local tmp, lst = { }, { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- tmp[k] = v +-- end +-- end +-- return tmp +-- end + function table.merged(...) - local tmp, lst = { }, { ... } - for i=1,#lst do - for k, v in next, lst[i] do - tmp[k] = v + local t = { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v end end - return tmp + return t end +-- function table.imerge(t, ...) +-- local lst, nt = { ... }, #t +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- nt = nt + 1 +-- t[nt] = nst[j] +-- end +-- end +-- return t +-- end + function table.imerge(t, ...) - local lst, nt = { ... }, #t - for i=1,#lst do - local nst = lst[i] + local nt = #t + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do nt = nt + 1 t[nt] = nst[j] @@ -233,10 +220,22 @@ function table.imerge(t, ...) return t end +-- function table.imerged(...) +-- local tmp, ntmp, lst = { }, 0, {...} +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- ntmp = ntmp + 1 +-- tmp[ntmp] = nst[j] +-- end +-- end +-- return tmp +-- end + function table.imerged(...) - local tmp, ntmp, lst = { }, 0, {...} - for i=1,#lst do - local nst = lst[i] + local tmp, ntmp = { }, 0 + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do ntmp = ntmp + 1 tmp[ntmp] = nst[j] @@ -248,7 +247,7 @@ end local function fastcopy(old,metatabletoo) -- fast one if old then local new = { } - for k,v in next, old do + for k, v in next, old do if type(v) == "table" then new[k] = fastcopy(v,metatabletoo) -- was just table.copy else @@ -302,7 +301,7 @@ end table.fastcopy = fastcopy table.copy = copy -function table.derive(parent) +function table.derive(parent) -- for the moment not public local child = { } if parent then setmetatable(child,{ __index = parent }) @@ -383,6 +382,13 @@ end -- problem: there no good number_to_string converter with the best resolution +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) + +-- todo: %g faster on numbers than %s + +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") + local function dummy() end local function do_serialize(root,name,depth,level,indexed) @@ -392,14 +398,14 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s{",depth)) else local tn = type(name) - if tn == "number" then -- or find(k,"^%d+$") then + if tn == "number" then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end elseif tn == "string" then - if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + if noquotes and not reserved[name] and lpegmatch(propername,name) then handle(format("%s%s={",depth,name)) else handle(format("%s[%q]={",depth,name)) @@ -425,7 +431,6 @@ local function do_serialize(root,name,depth,level,indexed) if compact then last = #root for k=1,last do --- if not root[k] then if root[k] == nil then last = k - 1 break @@ -473,7 +478,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s %s,",depth,tostring(v))) elseif t == "function" then if functions then - handle(format('%s loadstring(%q),',depth,dump(v))) + handle(format('%s load(%q),',depth,dump(v))) else handle(format('%s "function",',depth)) end @@ -485,7 +490,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s __p__=nil,",depth)) end elseif t == "number" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) else @@ -497,7 +502,7 @@ local function do_serialize(root,name,depth,level,indexed) else handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g end - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then if hexify then handle(format("%s %s=0x%04X,",depth,k,v)) else @@ -512,7 +517,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "string" then if reduce and tonumber(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,v)) else @@ -520,13 +525,13 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,v)) else handle(format("%s [%q]=%s,",depth,k,v)) end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,v)) else @@ -534,7 +539,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,v)) else handle(format("%s [%q]=%q,",depth,k,v)) @@ -542,7 +547,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "table" then if not next(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={},",depth,k)) else @@ -550,7 +555,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]={},",depth,tostring(k))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={},",depth,k)) else handle(format("%s [%q]={},",depth,k)) @@ -558,15 +563,15 @@ local function do_serialize(root,name,depth,level,indexed) elseif inline then local st = simple_table(v) if st then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) @@ -578,15 +583,15 @@ local function do_serialize(root,name,depth,level,indexed) do_serialize(v,k,depth,level+1) end elseif t == "boolean" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) else handle(format("%s [%s]=%s,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,tostring(v))) else handle(format("%s [%q]=%s,",depth,k,tostring(v))) @@ -595,30 +600,30 @@ local function do_serialize(root,name,depth,level,indexed) if functions then local f = getinfo(v).what == "C" and dump(dummy) or dump(v) -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then - handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) + handle(format("%s [0x%04X]=load(%q),",depth,k,f)) else - handle(format("%s [%s]=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,k,f)) end elseif tk == "boolean" then - handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=load(%q),",depth,k,f)) else - handle(format("%s [%q]=loadstring(%q),",depth,k,f)) + handle(format("%s [%q]=load(%q),",depth,k,f)) end end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) else handle(format("%s [%s]=%q,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,tostring(v))) else handle(format("%s [%q]=%q,",depth,k,tostring(v))) @@ -699,14 +704,14 @@ local function serialize(_handle,root,name,specification) -- handle wins handle("}") end ---~ name: ---~ ---~ true : return { } ---~ false : { } ---~ nil : t = { } ---~ string : string = { } ---~ 'return' : return { } ---~ number : [number] = { } +-- name: +-- +-- true : return { } +-- false : { } +-- nil : t = { } +-- string : string = { } +-- "return" : return { } +-- number : [number] = { } function table.serialize(root,name,specification) local t, n = { }, 0 @@ -795,7 +800,7 @@ table.flattened = flattened local function unnest(t,f) -- only used in mk, for old times sake if not f then -- and only relevant for token lists - f = { } + f = { } -- this one can become obsolete end for i=1,#t do local v = t[i] @@ -824,7 +829,7 @@ local function are_equal(a,b,n,m) -- indexed local ai, bi = a[i], b[i] if ai==bi then -- same - elseif type(ai)=="table" and type(bi)=="table" then + elseif type(ai) == "table" and type(bi) == "table" then if not are_equal(ai,bi) then return false end @@ -859,10 +864,10 @@ table.are_equal = are_equal -- maybe also make a combined one -function table.compact(t) +function table.compact(t) -- remove empty tables, assumes subtables if t then - for k,v in next, t do - if not next(v) then + for k, v in next, t do + if not next(v) then -- no type checking t[k] = nil end end @@ -901,7 +906,7 @@ function table.swapped(t,s) -- hash return n end -function table.mirror(t) -- hash +function table.mirrored(t) -- hash local n = { } for k, v in next, t do n[v] = k @@ -924,6 +929,17 @@ function table.reversed(t) end end +function table.reverse(t) + if t then + local n = #t + for i=1,floor(n/2) do + local j = n - i + 1 + t[i], t[j] = t[j], t[i] + end + return t + end +end + function table.sequenced(t,sep) -- hash only if t then local s, n = { }, 0 diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index 7fd380b88..7c452ef8f 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -10,29 +10,45 @@ if not modules then modules = { } end modules ['l-unicode'] = { -- todo: utf.sub replacement (used in syst-aux) -local concat = table.concat +-- we put these in the utf namespace: + +utf = utf or (unicode and unicode.utf8) or { } + +utf.characters = utf.characters or string.utfcharacters +utf.values = utf.values or string.utfvalues + +-- string.utfvalues +-- string.utfcharacters +-- string.characters +-- string.characterpairs +-- string.bytes +-- string.bytepairs + local type = type -local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt +local char, byte, format, sub = string.char, string.byte, string.format, string.sub +local concat = table.concat +local P, C, R, Cs, Ct, Cmt, Cc, Carg = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg local lpegmatch, patterns = lpeg.match, lpeg.patterns -local utftype = patterns.utftype -local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub -local utfsplitlines = string.utfsplitlines -if not unicode then +local bytepairs = string.bytepairs - unicode = { } +local finder = lpeg.finder +local replacer = lpeg.replacer -end - -local unicode = unicode +local utfvalues = utf.values +local utfgmatch = utf.gmatch -- not always present -utf = utf or unicode.utf8 +local p_utftype = patterns.utftype +local p_utfoffset = patterns.utfoffset +local p_utf8char = patterns.utf8char +local p_utf8byte = patterns.utf8byte +local p_utfbom = patterns.utfbom +local p_newline = patterns.newline +local p_whitespace = patterns.whitespace -if not utf then +if not unicode then - utf8 = { } - unicode.utf8 = utf8 - utf = utf8 + unicode = { utf = utf } -- for a while end @@ -89,64 +105,13 @@ if not utf.byte then end -if not utf.sub then - - local utf8char = patterns.utf8char - - -- inefficient as lpeg just copies ^n - - -- local function sub(str,start,stop) - -- local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1)) - -- inspect(pattern) - -- return lpegmatch(pattern,str) or "" - -- end - - local b, e, n, first, last = 0, 0, 0, 0, 0 - - local function slide(s,p) - n = n + 1 - if n == first then - b = p - if not last then - return nil - end - end - if n == last then - e = p - return nil - else - return p - end - end - - local pattern = Cmt(utf8char,slide)^0 - - function utf.sub(str,start,stop) -- todo: from the end - if not start then - return str - end - b, e, n, first, last = 0, 0, 0, start, stop - lpegmatch(pattern,str) - if not stop then - return sub(str,b) - else - return sub(str,b,e) - end - end - - -- print(utf.sub("Hans Hagen is my name")) - -- print(utf.sub("Hans Hagen is my name",5)) - -- print(utf.sub("Hans Hagen is my name",5,10)) - -end - local utfchar, utfbyte = utf.char, utf.byte -- As we want to get rid of the (unmaintained) utf library we implement our own -- variants (in due time an independent module): -function unicode.filetype(data) - return data and lpegmatch(utftype,data) or "unknown" +function utf.filetype(data) + return data and lpegmatch(p_utftype,data) or "unknown" end local toentities = Cs ( @@ -257,7 +222,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) -function string.toutf(s) +function string.toutf(s) -- in string namespace return lpegmatch(pattern,s) or s -- todo: utf32 end @@ -273,26 +238,269 @@ local validatedutf = Cs ( patterns.validatedutf = validatedutf -function string.validutf(str) - return lpegmatch(validatedutf,str) +function utf.is_valid(str) + return type(str) == "string" and lpegmatch(validatedutf,str) or false end +if not utf.len then -utf.length = string.utflength -utf.split = string.utfsplit -utf.splitines = string.utfsplitlines -utf.valid = string.validutf + -- -- alternative 1: 0.77 + -- + -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0) + -- + -- function utf.len(str) + -- return #lpegmatch(utfcharcounter,str or "") + -- end + -- + -- -- alternative 2: 1.70 + -- + -- local n = 0 + -- + -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow + -- + -- function utf.length(str) + -- n = 0 + -- lpegmatch(utfcharcounter,str or "") + -- return n + -- end + -- + -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + + -- local n = 0 + -- + -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * ( + -- -- patterns.utf8one ^1 * Cc(1) + -- -- + patterns.utf8two ^1 * Cc(2) + -- -- + patterns.utf8three^1 * Cc(3) + -- -- + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end + -- -- )^0 ) -- just as many captures as below + -- + -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( + -- -- (Cmt(patterns.utf8one ^1,function(_,_,s) n = n + #s return true end)) + -- -- + (Cmt(patterns.utf8two ^1,function(_,_,s) n = n + #s/2 return true end)) + -- -- + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end)) + -- -- + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end)) + -- -- )^0 ) -- not interesting as it creates strings but sometimes faster + -- + -- -- The best so far: + -- + -- local utfcharcounter = utfbom^-1 * P ( ( + -- Cp() * (patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + -- + Cp() * (patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + -- + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + -- + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end + -- )^0 ) + + -- function utf.len(str) + -- n = 0 + -- lpegmatch(utfcharcounter,str or "") + -- return n + -- end + + local n, f = 0, 1 + + local utfcharcounter = patterns.utfbom^-1 * Cmt ( + Cc(1) * patterns.utf8one ^1 + + Cc(2) * patterns.utf8two ^1 + + Cc(3) * patterns.utf8three^1 + + Cc(4) * patterns.utf8four ^1, + function(_,t,d) -- due to Cc no string captures, so faster + n = n + (t - f)/d + f = t + return true + end + )^0 + + function utf.len(str) + n, f = 0, 1 + lpegmatch(utfcharcounter,str or "") + return n + end -if not utf.len then - utf.len = utf.length end --- a replacement for simple gsubs: +utf.length = utf.len + +if not utf.sub then + + -- inefficient as lpeg just copies ^n + + -- local function sub(str,start,stop) + -- local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1)) + -- inspect(pattern) + -- return lpegmatch(pattern,str) or "" + -- end + + -- local b, e, n, first, last = 0, 0, 0, 0, 0 + -- + -- local function slide(s,p) + -- n = n + 1 + -- if n == first then + -- b = p + -- if not last then + -- return nil + -- end + -- end + -- if n == last then + -- e = p + -- return nil + -- else + -- return p + -- end + -- end + -- + -- local pattern = Cmt(p_utf8char,slide)^0 + -- + -- function utf.sub(str,start,stop) -- todo: from the end + -- if not start then + -- return str + -- end + -- b, e, n, first, last = 0, 0, 0, start, stop + -- lpegmatch(pattern,str) + -- if not stop then + -- return sub(str,b) + -- else + -- return sub(str,b,e-1) + -- end + -- end + + -- print(utf.sub("Hans Hagen is my name")) + -- print(utf.sub("Hans Hagen is my name",5)) + -- print(utf.sub("Hans Hagen is my name",5,10)) + + local utflength = utf.length + + -- also negative indices, upto 10 times slower than a c variant + + local b, e, n, first, last = 0, 0, 0, 0, 0 + + local function slide_zero(s,p) + n = n + 1 + if n >= last then + e = p - 1 + else + return p + end + end + + local function slide_one(s,p) + n = n + 1 + if n == first then + b = p + end + if n >= last then + e = p - 1 + else + return p + end + end + + local function slide_two(s,p) + n = n + 1 + if n == first then + b = p + else + return true + end + end + + local pattern_zero = Cmt(p_utf8char,slide_zero)^0 + local pattern_one = Cmt(p_utf8char,slide_one )^0 + local pattern_two = Cmt(p_utf8char,slide_two )^0 + + function utf.sub(str,start,stop) + if not start then + return str + end + if start == 0 then + start = 1 + end + if not stop then + if start < 0 then + local l = utflength(str) -- we can inline this function if needed + start = l + start + else + start = start - 1 + end + b, n, first = 0, 0, start + lpegmatch(pattern_two,str) + if n >= first then + return sub(str,b) + else + return "" + end + end + if start < 0 or stop < 0 then + local l = utf.length(str) + if start < 0 then + start = l + start + if start <= 0 then + start = 1 + else + start = start + 1 + end + end + if stop < 0 then + stop = l + stop + if stop == 0 then + stop = 1 + else + stop = stop + 1 + end + end + end + if start > stop then + return "" + elseif start > 1 then + b, e, n, first, last = 0, 0, 0, start - 1, stop + lpegmatch(pattern_one,str) + if n >= first and e == 0 then + e = #str + end + return sub(str,b,e) + else + b, e, n, last = 1, 0, 0, stop + lpegmatch(pattern_zero,str) + if e == 0 then + e = #str + end + return sub(str,b,e) + end + end -local utf8char = patterns.utf8char + -- local n = 100000 + -- local str = string.rep("123456àáâãäå",100) + -- + -- for i=-15,15,1 do + -- for j=-15,15,1 do + -- if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then + -- print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j)) + -- end + -- end + -- if utf.xsub(str,i) ~= utf.sub(str,i) then + -- print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i)) + -- end + -- end + + -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7)) + -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7)) + -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9)) + -- print(" 4 ",utf.xsub(str, 4 ),utf.sub(str, 4 )) + -- print(" 0 ",utf.xsub(str, 0 ),utf.sub(str, 0 )) + -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0)) + -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4)) + -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0)) + -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0)) + -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3)) + -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3)) + -- print("-3 ",utf.xsub(str,-3 ),utf.sub(str,-3 )) + +end + +-- a replacement for simple gsubs: function utf.remapper(mapping) - local pattern = Cs((utf8char/mapping)^0) + local pattern = Cs((p_utf8char/mapping)^0) return function(str) if not str or str == "" then return "" @@ -305,158 +513,113 @@ end -- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } -- print(remap("abcd 1234 abcd")) +-- + +function utf.replacer(t) -- no precheck, always string builder + local r = replacer(t,false,false,true) + return function(str) + return lpegmatch(r,str) + end +end + +function utf.subtituter(t) -- with precheck and no building if no match + local f = finder (t) + local r = replacer(t,false,false,true) + return function(str) + local i = lpegmatch(f,str) + if not i then + return str + elseif i > #str then + return str + else + -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower + return lpegmatch(r,str) + end + end +end + +-- inspect(utf.split("a b c d")) +-- inspect(utf.split("a b c d",true)) + +local utflinesplitter = p_utfbom^-1 * lpeg.tsplitat(p_newline) +local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0) +local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0) +local utfcharsplitter_raw = Ct(C(p_utf8char)^0) + +patterns.utflinesplitter = utflinesplitter + +function utf.splitlines(str) + return lpegmatch(utflinesplitter,str or "") +end + +function utf.split(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +function utf.totable(str) -- keeps bom + return lpegmatch(utfcharsplitter_raw,str) +end + -- 0 EF BB BF UTF-8 -- 1 FF FE UTF-16-little-endian -- 2 FE FF UTF-16-big-endian -- 3 FF FE 00 00 UTF-32-little-endian -- 4 00 00 FE FF UTF-32-big-endian - -unicode.utfname = { - [0] = 'utf-8', - [1] = 'utf-16-le', - [2] = 'utf-16-be', - [3] = 'utf-32-le', - [4] = 'utf-32-be' -} - +-- -- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated -function unicode.utftype(f) - local str = f:read(4) - if not str then - f:seek('set') - return 0 - -- elseif find(str,"^%z%z\254\255") then -- depricated - -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged - elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH) - return 4 - -- elseif find(str,"^\255\254%z%z") then -- depricated - -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged - elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH) - return 3 - elseif find(str,"^\254\255") then - f:seek('set',2) - return 2 - elseif find(str,"^\255\254") then - f:seek('set',2) - return 1 - elseif find(str,"^\239\187\191") then - f:seek('set',3) - return 0 - else - f:seek('set') - return 0 +-- utf.name = { +-- [0] = 'utf-8', +-- [1] = 'utf-16-le', +-- [2] = 'utf-16-be', +-- [3] = 'utf-32-le', +-- [4] = 'utf-32-be' +-- } +-- +-- function utf.magic(f) +-- local str = f:read(4) +-- if not str then +-- f:seek('set') +-- return 0 +-- -- elseif find(str,"^%z%z\254\255") then -- depricated +-- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged +-- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH) +-- return 4 +-- -- elseif find(str,"^\255\254%z%z") then -- depricated +-- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged +-- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH) +-- return 3 +-- elseif find(str,"^\254\255") then +-- f:seek('set',2) +-- return 2 +-- elseif find(str,"^\255\254") then +-- f:seek('set',2) +-- return 1 +-- elseif find(str,"^\239\187\191") then +-- f:seek('set',3) +-- return 0 +-- else +-- f:seek('set') +-- return 0 +-- end +-- end + +function utf.magic(f) -- not used + local str = f:read(4) or "" + local off = lpegmatch(p_utfoffset,str) + if off < 4 then + f:seek('set',off) end + return lpegmatch(p_utftype,str) end ---~ function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg ---~ local result, tmp, n, m, p, r, t = { }, { }, 0, 0, 0, 0, 0 -- we reuse tmp ---~ -- lf | cr | crlf / (cr:13, lf:10) ---~ local function doit() -- inline this ---~ if n == 10 then ---~ if p ~= 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = 0 ---~ end ---~ elseif n == 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = n ---~ else ---~ t = t + 1 ---~ tmp[t] = utfchar(n) ---~ p = 0 ---~ end ---~ end ---~ for l,r in bytepairs(str) do ---~ if r then ---~ if endian then -- maybe make two loops ---~ n = 256*l + r ---~ else ---~ n = 256*r + l ---~ end ---~ if m > 0 then ---~ n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 ---~ m = 0 ---~ doit() ---~ elseif n >= 0xD800 and n <= 0xDBFF then ---~ m = n ---~ else ---~ doit() ---~ end ---~ end ---~ end ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t ---~ end ---~ return result ---~ end - ---~ function unicode.utf32_to_utf8(str, endian) ---~ local result, tmp, n, m, p, r, t = { }, { }, 0, -1, 0, 0, 0 ---~ -- lf | cr | crlf / (cr:13, lf:10) ---~ local function doit() -- inline this ---~ if n == 10 then ---~ if p ~= 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = 0 ---~ end ---~ elseif n == 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = n ---~ else ---~ t = t + 1 ---~ tmp[t] = utfchar(n) ---~ p = 0 ---~ end ---~ end ---~ for a,b in bytepairs(str) do ---~ if a and b then ---~ if m < 0 then ---~ if endian then -- maybe make two loops ---~ m = 256*256*256*a + 256*256*b ---~ else ---~ m = 256*b + a ---~ end ---~ else ---~ if endian then -- maybe make two loops ---~ n = m + 256*a + b ---~ else ---~ n = m + 256*256*256*b + 256*256*a ---~ end ---~ m = -1 ---~ doit() ---~ end ---~ else ---~ break ---~ end ---~ end ---~ if #tmp > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t ---~ end ---~ return result ---~ end - local function utf16_to_utf8_be(t) if type(t) == "string" then - t = utfsplitlines(str) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -484,7 +647,7 @@ end local function utf16_to_utf8_le(t) if type(t) == "string" then - t = utfsplitlines(str) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -512,7 +675,7 @@ end local function utf32_to_utf8_be(t) if type(t) == "string" then - t = utfsplitlines(t) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -537,7 +700,7 @@ end local function utf32_to_utf8_le(t) if type(t) == "string" then - t = utfsplitlines(t) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -560,20 +723,20 @@ local function utf32_to_utf8_le(t) return t end -unicode.utf32_to_utf8_be = utf32_to_utf8_be -unicode.utf32_to_utf8_le = utf32_to_utf8_le -unicode.utf16_to_utf8_be = utf16_to_utf8_be -unicode.utf16_to_utf8_le = utf16_to_utf8_le +utf.utf32_to_utf8_be = utf32_to_utf8_be +utf.utf32_to_utf8_le = utf32_to_utf8_le +utf.utf16_to_utf8_be = utf16_to_utf8_be +utf.utf16_to_utf8_le = utf16_to_utf8_le -function unicode.utf8_to_utf8(t) - return type(t) == "string" and utfsplitlines(t) or t +function utf.utf8_to_utf8(t) + return type(t) == "string" and lpegmatch(utflinesplitter,t) or t end -function unicode.utf16_to_utf8(t,endian) +function utf.utf16_to_utf8(t,endian) return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t end -function unicode.utf32_to_utf8(t,endian) +function utf.utf32_to_utf8(t,endian) return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t end @@ -599,7 +762,7 @@ local function big(c) end end --- function unicode.utf8_to_utf16(str,littleendian) +-- function utf.utf8_to_utf16(str,littleendian) -- if littleendian then -- return char(255,254) .. utfgsub(str,".",little) -- else @@ -610,7 +773,7 @@ end local _, l_remap = utf.remapper(little) local _, b_remap = utf.remapper(big) -function unicode.utf8_to_utf16(str,littleendian) +function utf.utf8_to_utf16(str,littleendian) if littleendian then return char(255,254) .. lpegmatch(l_remap,str) else @@ -618,27 +781,67 @@ function unicode.utf8_to_utf16(str,littleendian) end end -function unicode.utfcodes(str) - local t, n = { }, 0 - for u in utfvalues(str) do - n = n + 1 - t[n] = format("0x%04X",u) - end - return concat(t,separator or " ") +-- function utf.tocodes(str,separator) -- can be sped up with an lpeg +-- local t, n = { }, 0 +-- for u in utfvalues(str) do +-- n = n + 1 +-- t[n] = format("0x%04X",u) +-- end +-- return concat(t,separator or " ") +-- end + +local pattern = Cs ( + (p_utf8byte / function(unicode ) return format( "0x%04X", unicode) end) * + (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0 +) + +function utf.tocodes(str,separator) + return lpegmatch(pattern,str,1,separator or " ") end -function unicode.ustring(s) +function utf.ustring(s) return format("U+%05X",type(s) == "number" and s or utfbyte(s)) end -function unicode.xstring(s) +function utf.xstring(s) return format("0x%05X",type(s) == "number" and s or utfbyte(s)) end -- -local pattern = Ct(C(patterns.utf8char)^0) +local p_nany = p_utf8char / "" + +if utfgmatch then + + function utf.count(str,what) + if type(what) == "string" then + local n = 0 + for _ in utfgmatch(str,what) do + n = n + 1 + end + return n + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) + end + end + +else + + local cache = { } + + function utf.count(str,what) + if type(what) == "string" then + local p = cache[what] + if not p then + p = Cs((P(what)/" " + p_nany)^0) + cache[p] = p + end + return #lpegmatch(p,str) + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) + end + end -function utf.totable(str) - return lpegmatch(pattern,str) end + +-- maybe also register as string.utf* diff --git a/tex/context/base/lang-ini.lua b/tex/context/base/lang-ini.lua index eaedcd69a..02a33c181 100644 --- a/tex/context/base/lang-ini.lua +++ b/tex/context/base/lang-ini.lua @@ -17,7 +17,6 @@ if not modules then modules = { } end modules ['lang-ini'] = { --~ lang:hyphenation(string) string = lang:hyphenation() lang:clear_hyphenation() local type, tonumber = type, tonumber -local utf = unicode.utf8 local utfbyte = utf.byte local format, gsub = string.format, string.gsub local concat, sortedkeys, sortedpairs = table.concat, table.sortedkeys, table.sortedpairs diff --git a/tex/context/base/lang-url.lua b/tex/context/base/lang-url.lua index 3b354216a..35381e672 100644 --- a/tex/context/base/lang-url.lua +++ b/tex/context/base/lang-url.lua @@ -6,10 +6,7 @@ if not modules then modules = { } end modules ['lang-url'] = { license = "see context related readme files" } -local utf = unicode.utf8 - -local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues -local utfbyte, utfchar = utf.byte, utf.char +local utfcharacters, utfvalues, utfbyte, utfchar = utf.characters, utf.values, utf.byte, utf.char context = context diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua index c5bc75ca2..84d6107d4 100644 --- a/tex/context/base/lang-wrd.lua +++ b/tex/context/base/lang-wrd.lua @@ -6,8 +6,8 @@ if not modules then modules = { } end modules ['lang-wrd'] = { license = "see context related readme files" } -local utf = unicode.utf8 -local lower, utfchar = string.lower, utf.char +local lower = string.lower +local utfchar = utf.char local concat = table.concat local lpegmatch = lpeg.match local P, S, Cs = lpeg.P, lpeg.S, lpeg.Cs diff --git a/tex/context/base/lpdf-epa.lua b/tex/context/base/lpdf-epa.lua index 03a36f2dc..93e494a31 100644 --- a/tex/context/base/lpdf-epa.lua +++ b/tex/context/base/lpdf-epa.lua @@ -12,6 +12,8 @@ if not modules then modules = { } end modules ['lpdf-epa'] = { local type, tonumber = type, tonumber local format, gsub = string.format, string.gsub +----- lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns + local trace_links = false trackers.register("figures.links", function(v) trace_links = v end) local report_link = logs.reporter("backend","merging") @@ -20,6 +22,9 @@ local backends, lpdf = backends, lpdf local variables = interfaces.variables local codeinjections = backends.pdf.codeinjections +----- urlescaper = lpegpatterns.urlescaper +----- utftohigh = lpegpatterns.utftohigh +local escapetex = characters.filters.utf.private.escape local layerspec = { -- predefining saves time "epdflinks" @@ -53,19 +58,22 @@ local function add_link(x,y,w,h,destination,what) end local function link_goto(x,y,w,h,document,annotation,pagedata,namespace) - local destination = annotation.A.D -- [ 18 0 R /Fit ] - local what = "page" - if type(destination) == "string" then - local destinations = document.destinations - local wanted = destinations[destination] - destination = wanted and wanted.D - if destination then what = "named" end - end - local pagedata = destination and destination[1] - if pagedata then - local destinationpage = pagedata.number - if destinationpage then - add_link(x,y,w,h,namespace .. destinationpage,what) + local a = annotation.A + if a then + local destination = a.D -- [ 18 0 R /Fit ] + local what = "page" + if type(destination) == "string" then + local destinations = document.destinations + local wanted = destinations[destination] + destination = wanted and wanted.D + if destination then what = "named" end + end + local pagedata = destination and destination[1] + if pagedata then + local destinationpage = pagedata.number + if destinationpage then + add_link(x,y,w,h,namespace .. destinationpage,what) + end end end end @@ -73,24 +81,31 @@ end local function link_uri(x,y,w,h,document,annotation) local url = annotation.A.URI if url then + -- url = lpegmatch(urlescaper,url) + -- url = lpegmatch(utftohigh,url) + url = escapetex(url) add_link(x,y,w,h,format("url(%s)",url),"url") end end local function link_file(x,y,w,h,document,annotation) - local filename = annotation.A.F - if filename then - local destination = annotation.A.D - if not destination then - add_link(x,y,w,h,format("file(%s)",filename),"file") - elseif type(destination) == "string" then - add_link(x,y,w,h,format("%s::%s",filename,destination),"file (named)") - else - destination = destination[1] -- array - if tonumber(destination) then - add_link(x,y,w,h,format("%s::page(%s)",filename,destination),"file (page)") - else + local a = annotation.A + if a then + local filename = a.F + if filename then + filename = escapetex(filename) + local destination = a.D + if not destination then add_link(x,y,w,h,format("file(%s)",filename),"file") + elseif type(destination) == "string" then + add_link(x,y,w,h,format("%s::%s",filename,destination),"file (named)") + else + destination = destination[1] -- array + if tonumber(destination) then + add_link(x,y,w,h,format("%s::page(%s)",filename,destination),"file (page)") + else + add_link(x,y,w,h,format("file(%s)",filename),"file") + end end end end @@ -110,41 +125,50 @@ function codeinjections.mergereferences(specification) local yscale = specification.yscale or 1 local size = specification.size or "crop" -- todo local pagedata = document.pages[pagenumber] - local annotations = pagedata.Annots + local annotations = pagedata and pagedata.Annots if annotations and annotations.n > 0 then - local namespace = format("lpdf-epa-%s-",file.removesuffix(file.basename(fullname))) - local reference = namespace .. pagenumber + local namespace = format("lpdf-epa-%s-",file.removesuffix(file.basename(fullname))) + local reference = namespace .. pagenumber local mediabox = pagedata.MediaBox local llx, lly, urx, ury = mediabox[1], mediabox[2], mediabox[3], mediabox[4] local width, height = xscale * (urx - llx), yscale * (ury - lly) -- \\overlaywidth, \\overlayheight context.definelayer( { "epdflinks" }, { height = height.."bp" , width = width.."bp" }) for i=1,annotations.n do local annotation = annotations[i] - local subtype = annotation.Subtype - local rectangle = annotation.Rect - local a_llx, a_lly, a_urx, a_ury = rectangle[1], rectangle[2], rectangle[3], rectangle[4] - local x, y = xscale * (a_llx - llx), yscale * (a_lly - lly) - local w, h = xscale * (a_urx - a_llx), yscale * (a_ury - a_lly) - if subtype == "Link" then - local linktype = annotation.A.S - if linktype == "GoTo" then - link_goto(x,y,w,h,document,annotation,pagedata,namespace) - elseif linktype == "GoToR" then - link_file(x,y,w,h,document,annotation) - elseif linktype == "URI" then - link_uri(x,y,w,h,document,annotation) + if annotation then + local subtype = annotation.Subtype + local rectangle = annotation.Rect + local a_llx, a_lly, a_urx, a_ury = rectangle[1], rectangle[2], rectangle[3], rectangle[4] + local x, y = xscale * (a_llx - llx), yscale * (a_lly - lly) + local w, h = xscale * (a_urx - a_llx), yscale * (a_ury - a_lly) + if subtype == "Link" then + local a = annotation.A + if a then + local linktype = a.S + if linktype == "GoTo" then + link_goto(x,y,w,h,document,annotation,pagedata,namespace) + elseif linktype == "GoToR" then + link_file(x,y,w,h,document,annotation) + elseif linktype == "URI" then + link_uri(x,y,w,h,document,annotation) + elseif trace_links then + report_link("unsupported link annotation %q",linktype) + end + else + report_link("mising link annotation") + end elseif trace_links then - report_link("unsupported link annotation '%s'",linktype) + report_link("unsupported annotation %q",subtype) end elseif trace_links then - report_link("unsupported annotation '%s'",subtype) + report_link("broken annotation, index: %i",i) end end context.flushlayer { "epdflinks" } -- context("\\gdef\\figurereference{%s}",reference) -- global context.setgvalue("figurereference",reference) -- global if trace_links then - report_link("setting figure reference to '%s'",reference) + report_link("setting figure reference to %q",reference) end specification.reference = reference return namespace @@ -171,19 +195,24 @@ function codeinjections.mergeviewerlayers(specification) local layers = document.layers if layers then for i=1,layers.n do - local tag = namespace .. gsub(layers[i]," ",":") - local title = tag - if trace_links then - report_link("using layer '%s'",tag) + local layer = layers[i] + if layer then + local tag = namespace .. gsub(layer," ",":") + local title = tag + if trace_links then + report_link("using layer %q",tag) + end + attributes.viewerlayers.define { -- also does some cleaning + tag = tag, -- todo: #3A or so + title = title, + visible = variables.start, + editable = variables.yes, + printable = variables.yes, + } + codeinjections.useviewerlayer(tag) + elseif trace_links then + report_link("broken layer, index: %i",i) end - attributes.viewerlayers.define { -- also does some cleaning - tag = tag, -- todo: #3A or so - title = title, - visible = variables.start, - editable = variables.yes, - printable = variables.yes, - } - codeinjections.useviewerlayer(tag) end end end diff --git a/tex/context/base/lpdf-epd.lua b/tex/context/base/lpdf-epd.lua index 1c4b4b5c5..b9f8cfc7c 100644 --- a/tex/context/base/lpdf-epd.lua +++ b/tex/context/base/lpdf-epd.lua @@ -27,6 +27,8 @@ local lower, match, char, find, sub = string.lower, string.match, string.char, s local concat = table.concat local toutf = string.toutf +local report_epdf = logs.reporter("epdf") + -- a bit of protection local limited = false @@ -59,9 +61,7 @@ local function prepare(document,d,t,n,k) for i=1,n do local v = d:getVal(i) local r = d:getValNF(i) - if r:getTypeName() ~= "ref" then - t[d:getKey(i)] = checked_access[v:getTypeName()](v,document) - else + if r:getTypeName() == "ref" then r = r:getRef().num local c = document.cache[r] if c then @@ -74,6 +74,8 @@ local function prepare(document,d,t,n,k) end end t[d:getKey(i)] = c + else + t[d:getKey(i)] = checked_access[v:getTypeName()](v,document) end end getmetatable(t).__index = nil @@ -95,9 +97,9 @@ local function prepare(document,a,t,n,k) for i=1,n do local v = a:get(i) local r = a:getNF(i) - if r:getTypeName() ~= "ref" then - t[i] = checked_access[v:getTypeName()](v,document) - else + if v:getTypeName() == "null" then + -- TH: weird, but appears possible + elseif r:getTypeName() == "ref" then r = r:getRef().num local c = document.cache[r] if c then @@ -108,6 +110,8 @@ local function prepare(document,a,t,n,k) document.xrefs[c] = r end t[i] = c + else + t[i] = checked_access[v:getTypeName()](v,document) end end getmetatable(t).__index = nil @@ -267,10 +271,14 @@ local function getpages(document) for pagenumber=1,nofpages do local pagereference = cata:getPageRef(pagenumber).num local pagedata = some_dictionary(xref:fetch(pagereference,0):getDict(),document,pagereference) - pagedata.number = pagenumber - pages[pagenumber] = pagedata - xrefs[pagedata] = pagereference - cache[pagereference] = pagedata + if pagedata then + pagedata.number = pagenumber + pages[pagenumber] = pagedata + xrefs[pagedata] = pagereference + cache[pagereference] = pagedata + else + report_epdf("missing pagedata at slot %i",i) + end end pages.n = nofpages return pages diff --git a/tex/context/base/lpdf-ini.lua b/tex/context/base/lpdf-ini.lua index 1591d6da1..c1b742949 100644 --- a/tex/context/base/lpdf-ini.lua +++ b/tex/context/base/lpdf-ini.lua @@ -8,8 +8,7 @@ if not modules then modules = { } end modules ['lpdf-ini'] = { local setmetatable, getmetatable, type, next, tostring, tonumber, rawset = setmetatable, getmetatable, type, next, tostring, tonumber, rawset local char, byte, format, gsub, concat, match, sub, gmatch = string.char, string.byte, string.format, string.gsub, table.concat, string.match, string.sub, string.gmatch -local utfvalues = string.utfvalues -local utfchar = utf.char +local utfchar, utfvalues = utf.char, utf.values local sind, cosd = math.sind, math.cosd local lpegmatch, P, C, R, S, Cc, Cs = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cc, lpeg.Cs diff --git a/tex/context/base/lpdf-swf.lua b/tex/context/base/lpdf-swf.lua index 4bbec8dbe..d3c2b41da 100644 --- a/tex/context/base/lpdf-swf.lua +++ b/tex/context/base/lpdf-swf.lua @@ -108,7 +108,7 @@ local function insertswf(spec) local names = configuration.Assets.Names local prefix = false if root ~= "" and root ~= "." then - prefix = format("^%s/",string.escapedpattern(root,true)) + prefix = format("^%s/",string.topattern(root)) end if prefix and trace_swf then report_swf("using strip pattern '%s'",prefix) diff --git a/tex/context/base/luat-bas.mkiv b/tex/context/base/luat-bas.mkiv index 4d6f6d03b..e24568b0a 100644 --- a/tex/context/base/luat-bas.mkiv +++ b/tex/context/base/luat-bas.mkiv @@ -13,10 +13,11 @@ \writestatus{loading}{ConTeXt Lua Macros / Basic Lua Libraries} +\registerctxluafile{l-lua} {1.001} +\registerctxluafile{l-lpeg} {1.001} \registerctxluafile{l-function}{1.001} \registerctxluafile{l-string} {1.001} \registerctxluafile{l-table} {1.001} -\registerctxluafile{l-lpeg} {1.001} \registerctxluafile{l-boolean} {1.001} \registerctxluafile{l-number} {1.001} \registerctxluafile{l-math} {1.001} diff --git a/tex/context/base/luat-cnf.lua b/tex/context/base/luat-cnf.lua index 07a5d51f2..e574bf428 100644 --- a/tex/context/base/luat-cnf.lua +++ b/tex/context/base/luat-cnf.lua @@ -30,22 +30,22 @@ texconfig.param_size = 25000 -- 60 texconfig.save_size = 50000 -- 4000 texconfig.stack_size = 10000 -- 300 ---~ local function initialize() ---~ local t, variable = allocate(), resolvers.variable ---~ for name, default in next, variablenames do ---~ local name = variablenames[i] ---~ local value = variable(name) ---~ value = tonumber(value) ---~ if not value or value == "" or value == 0 then ---~ value = default ---~ end ---~ texconfig[name], t[name] = value, value ---~ end ---~ initialize = nil ---~ return t ---~ end - ---~ luatex.variables = initialize() +-- local function initialize() +-- local t, variable = allocate(), resolvers.variable +-- for name, default in next, variablenames do +-- local name = variablenames[i] +-- local value = variable(name) +-- value = tonumber(value) +-- if not value or value == "" or value == 0 then +-- value = default +-- end +-- texconfig[name], t[name] = value, value +-- end +-- initialize = nil +-- return t +-- end +-- +-- luatex.variables = initialize() local stub = [[ @@ -71,9 +71,9 @@ function texconfig.init() local builtin, globals = { }, { } - libraries = { -- we set it her as we want libraries also 'indexed' + libraries = { -- we set it here as we want libraries also 'indexed' basiclua = { - "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", + "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", }, basictex = { -- noad "callback", "font", "img", "lang", "lua", "node", "pdf", "status", "tex", "texconfig", "texio", "token", @@ -88,6 +88,14 @@ function texconfig.init() "fontforge", -- can be filled by luat-log "kpse", }, + functions = { + "assert", "pcall", "xpcall", "error", "collectgarbage", + "dofile", "load","loadfile", "require", "module", + "getmetatable", "setmetatable", + "ipairs", "pairs", "rawequal", "rawget", "rawset", "next", + "tonumber", "tostring", + "type", "unpack", "select", "print", + }, builtin = builtin, -- to be filled globals = globals, -- to be filled } @@ -96,27 +104,33 @@ function texconfig.init() globals[k] = tostring(v) end - local function collect(t) + local function collect(t,fnc) local lib = { } for k, v in next, t do - local keys = { } - local gv = _G[v] - if type(gv) == "table" then - for k, v in next, gv do - keys[k] = tostring(v) -- true -- by tostring we cannot call overloades functions (security) + if fnc then + lib[v] = _G[v] + else + local keys = { } + local gv = _G[v] + local tv = type(gv) + if tv == "table" then + for k, v in next, gv do + keys[k] = tostring(v) -- true -- by tostring we cannot call overloades functions (security) + end end + lib[v] = keys + builtin[v] = keys end - lib[v] = keys - builtin[v] = keys end return lib end - libraries.basiclua = collect(libraries.basiclua) - libraries.basictex = collect(libraries.basictex) - libraries.extralua = collect(libraries.extralua) - libraries.extratex = collect(libraries.extratex) - libraries.obsolete = collect(libraries.obsolete) + libraries.basiclua = collect(libraries.basiclua) + libraries.basictex = collect(libraries.basictex) + libraries.extralua = collect(libraries.extralua) + libraries.extratex = collect(libraries.extratex) + libraries.functions = collect(libraries.functions,true) + libraries.obsolete = collect(libraries.obsolete) -- shortcut and helper @@ -150,7 +164,6 @@ end) -- done, from now on input and callbacks are internal ]] - local variablenames = { "error_line", "half_error_line", "expand_depth", "hash_extra", "nest_size", diff --git a/tex/context/base/luat-cod.mkiv b/tex/context/base/luat-cod.mkiv index beb631844..9ce6161c3 100644 --- a/tex/context/base/luat-cod.mkiv +++ b/tex/context/base/luat-cod.mkiv @@ -67,7 +67,7 @@ %D Reporting the version of \LUA\ that we use is done as follows: -\edef\luaversion{\ctxwrite{_VERSION}} % no context luaded yet +\edef\luaversion{\ctxwrite{_VERSION}} \def\registerctxluafile#1#2{\ctxlua{lua.registercode("#1","#2")}} \def\ctxloadluafile #1{\ctxlua{lua.registercode("#1")}} diff --git a/tex/context/base/luat-env.lua b/tex/context/base/luat-env.lua index 4d58897a0..7bce0052d 100644 --- a/tex/context/base/luat-env.lua +++ b/tex/context/base/luat-env.lua @@ -22,6 +22,10 @@ local format, sub, match, gsub, find = string.format, string.sub, string.match, local unquoted, quoted = string.unquoted, string.quoted local concat, insert, remove = table.concat, table.insert, table.remove local loadedluacode = utilities.lua.loadedluacode +local luasuffixes = utilities.lua.suffixes + +environment = environment or { } +local environment = environment -- precautions @@ -31,9 +35,29 @@ function os.setlocale() -- no way you can mess with it end --- dirty tricks +-- dirty tricks (we will replace the texlua call by luatex --luaonly) + +local validengines = allocate { + ["luatex"] = true, + ["luajittex"] = true, + -- ["luatex.exe"] = true, + -- ["luajittex.exe"] = true, +} + +local basicengines = allocate { + ["luatex"] = "luatex", + ["texlua"] = "luatex", + ["texluac"] = "luatex", + ["luajittex"] = "luajittex", + ["texluajit"] = "luajittex", + -- ["texlua.exe"] = "luatex", + -- ["texluajit.exe"] = "luajittex", +} -if arg and (arg[0] == 'luatex' or arg[0] == 'luatex.exe') and arg[1] == "--luaonly" then +environment.validengines = validengines +environment.basicengines = basicengines + +if arg and validengines[file.removesuffix(arg[0])] and arg[1] == "--luaonly" then arg[-1] = arg[0] arg[ 0] = arg[2] for k=3,#arg do @@ -65,9 +89,6 @@ end -- environment -environment = environment or { } -local environment = environment - environment.arguments = allocate() environment.files = allocate() environment.sortedflags = nil @@ -123,7 +144,7 @@ function environment.initializearguments(arg) end end end - environment.ownname = environment.ownname or arg[0] or 'unknown.lua' + environment.ownname = file.reslash(environment.ownname or arg[0] or 'unknown.lua') end function environment.setargument(name,value) @@ -204,22 +225,22 @@ function environment.reconstructcommandline(arg,noquote) end end ---~ -- to be tested: ---~ ---~ function environment.reconstructcommandline(arg,noquote) ---~ arg = arg or environment.originalarguments ---~ if noquote and #arg == 1 then ---~ return unquoted(resolvers.resolve(arg[1])) ---~ elseif #arg > 0 then ---~ local result = { } ---~ for i=1,#arg do ---~ result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote ---~ end ---~ return concat(result," ") ---~ else ---~ return "" ---~ end ---~ end +-- -- to be tested: +-- +-- function environment.reconstructcommandline(arg,noquote) +-- arg = arg or environment.originalarguments +-- if noquote and #arg == 1 then +-- return unquoted(resolvers.resolve(arg[1])) +-- elseif #arg > 0 then +-- local result = { } +-- for i=1,#arg do +-- result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote +-- end +-- return concat(result," ") +-- else +-- return "" +-- end +-- end if arg then @@ -314,9 +335,11 @@ function environment.loadluafile(filename, version) local lucname, luaname, chunk local basename = file.removesuffix(filename) if basename == filename then - lucname, luaname = basename .. ".luc", basename .. ".lua" + luaname = fiule.addsuffix(basename,luasuffixes.lua) + lucname = fiule.addsuffix(basename,luasuffixes.luc) else - lucname, luaname = nil, basename -- forced suffix + luaname = basename -- forced suffix + lucname = nil end -- when not overloaded by explicit suffix we look for a luc file first local fullname = (lucname and environment.luafile(lucname)) or "" diff --git a/tex/context/base/luat-exe.lua b/tex/context/base/luat-exe.lua index 0d5cb5abc..a57a5a006 100644 --- a/tex/context/base/luat-exe.lua +++ b/tex/context/base/luat-exe.lua @@ -10,6 +10,7 @@ if not modules then modules = { } end modules ['luat-exe'] = { local match, find, gmatch = string.match, string.find, string.gmatch local concat = table.concat +local select = select local report_executers = logs.reporter("system","executers") @@ -29,20 +30,20 @@ local spawn = osspawn local popen = iopopen local function register(...) - local t = { ... } - for k=1,#t do - local v = t[k] - permitted[#permitted+1] = (v == "*" and ".*") or v + for k=1,select("#",...) do + local v = select(k,...) + permitted[#permitted+1] = v == "*" and ".*" or v end end local function prepare(...) -- todo: make more clever first split local t = { ... } + local n = #n local one = t[1] - if #t == 1 then + if n == 1 then if type(one) == 'table' then - return one, concat(t," ",2,#t) + return one, concat(t," ",2,n) else local name, arguments = match(one,"^(.-)%s+(.+)$") if name and arguments then @@ -52,7 +53,7 @@ local function prepare(...) end end else - return one, concat(t," ",2,#t) + return one, concat(t," ",2,n) end end diff --git a/tex/context/base/luat-fio.lua b/tex/context/base/luat-fio.lua index 8e7988c4e..0af9cb6fc 100644 --- a/tex/context/base/luat-fio.lua +++ b/tex/context/base/luat-fio.lua @@ -81,6 +81,9 @@ if not resolvers.instance then register('find_write_file' , function(id,name) return name end, true) register('find_format_file' , function(name) return name end, true) + register('find_cidmap_file' , function(name) return findbinfile(name,"cidmap") end, true) + -- register('read_cidmap_file' , function(file) return loadbinfile(file,"cidmap") end, true) + end end diff --git a/tex/context/base/luat-fmt.lua b/tex/context/base/luat-fmt.lua index 37b0f5166..2d2614ecb 100644 --- a/tex/context/base/luat-fmt.lua +++ b/tex/context/base/luat-fmt.lua @@ -8,13 +8,11 @@ if not modules then modules = { } end modules ['luat-fmt'] = { local format = string.format +local quoted = string.quoted +local luasuffixes = utilities.lua.suffixes local report_format = logs.reporter("resolvers","formats") --- helper for mtxrun - -local quoted = string.quoted - local function primaryflags() -- not yet ok local trackers = environment.argument("trackers") local directives = environment.argument("directives") @@ -29,13 +27,14 @@ local function primaryflags() -- not yet ok end function environment.make_format(name) + local engine = environment.ownmain or "luatex" -- change to format path (early as we need expanded paths) - local olddir = lfs.currentdir() - local path = caches.getwritablepath("formats") or "" -- maybe platform + local olddir = dir.current() + local path = caches.getwritablepath("formats",engine) or "" -- maybe platform if path ~= "" then lfs.chdir(path) end - report_format("format path: %s",lfs.currentdir()) + report_format("format path: %s",dir.current()) -- check source file local texsourcename = file.addsuffix(name,"mkiv") local fulltexsourcename = resolvers.findfile(texsourcename,"tex") or "" @@ -72,13 +71,12 @@ function environment.make_format(name) elseif type(usedlualibs) == "table" then report_format("using stub specification: %s",fullspecificationname) local texbasename = file.basename(name) - local luastubname = file.addsuffix(texbasename,"lua") - local lucstubname = file.addsuffix(texbasename,"luc") + local luastubname = file.addsuffix(texbasename,luasuffixes.lua) + local lucstubname = file.addsuffix(texbasename,luasuffixes.luc) -- pack libraries in stub report_format("creating initialization file: %s",luastubname) utilities.merger.selfcreate(usedlualibs,specificationpath,luastubname) -- compile stub file (does not save that much as we don't use this stub at startup any more) - local strip = resolvers.booleanvariable("LUACSTRIP", true) if utilities.lua.compile(luastubname,lucstubname) and lfs.isfile(lucstubname) then report_format("using compiled initialization file: %s",lucstubname) usedluastub = lucstubname @@ -92,7 +90,7 @@ function environment.make_format(name) return end -- generate format - local command = format("luatex --ini %s --lua=%s %s %sdump",primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\") + local command = format("%s --ini %s --lua=%s %s %sdump",engine,primaryflags(),quoted(usedluastub),quoted(fulltexsourcename),os.platform == "unix" and "\\\\" or "\\") report_format("running command: %s\n",command) os.spawn(command) -- remove related mem files @@ -111,8 +109,9 @@ end function environment.run_format(name,data,more) if name and name ~= "" then + local engine = environment.ownmain or "luatex" local barename = file.removesuffix(name) - local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats") + local fmtname = caches.getfirstreadablefile(file.addsuffix(barename,"fmt"),"formats",engine) if fmtname == "" then fmtname = resolvers.findfile(file.addsuffix(barename,"fmt")) or "" end @@ -129,7 +128,7 @@ function environment.run_format(name,data,more) report_format("using format name: %s",fmtname) report_format("no luc/lua with name: %s",barename) else - local command = format("luatex %s --fmt=%s --lua=%s %s %s",primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "") + local command = format("%s %s --fmt=%s --lua=%s %s %s",engine,primaryflags(),quoted(barename),quoted(luaname),quoted(data),more ~= "" and quoted(more) or "") report_format("running command: %s",command) os.spawn(command) end diff --git a/tex/context/base/luat-ini.lua b/tex/context/base/luat-ini.lua index 0b8c5d951..587214b93 100644 --- a/tex/context/base/luat-ini.lua +++ b/tex/context/base/luat-ini.lua @@ -6,15 +6,16 @@ if not modules then modules = { } end modules ['luat-ini'] = { license = "see context related readme files" } --- rather experimental down here ... will change with lua 5.2 -- +-- rather experimental down here ... adapted to lua 5.2 ... but still +-- experimental local debug = require("debug") local string, table, lpeg, math, io, system = string, table, lpeg, math, io, system -local next, setfenv = next, setfenv or debug.setfenv +local rawset, rawget, next, setmetatable = rawset, rawget, next, setmetatable --[[ldx-- -We cannot load anything yet. However what we will do us reserve a fewtables. +
We cannot load anything yet. However what we will do us reserve a few tables. These can be used for runtime user data or third party modules and will not be cluttered by macro package code.
--ldx]]-- @@ -38,115 +39,168 @@ thirddata['tricks' ] = { } --ldx]]-- --[[ldx-- -We could cook up a readonly model for global tables but it -makes more sense to invite users to use one of the predefined -namespaces. One can redefine the protector. After all, it's -just a lightweight suggestive system, not a watertight -one.
+We could cook up a readonly model for global tables but it makes more sense +to invite users to use one of the predefined namespaces. One can redefine the +protector. After all, it's just a lightweight suggestive system, not a +watertight one.
--ldx]]-- --- this will change when we move on to lua 5.2+ - -local global = _G - +local global = _G global.global = global ---~ rawset(global,"global",global) local dummy = function() end --- another approach is to freeze tables by using a metatable, this will be --- implemented stepwise - -local protected = { - -- global table - global = global, - -- user tables - -- moduledata = moduledata, - userdata = userdata, - thirddata = thirddata, - documentdata = documentdata, - -- reserved - protect = dummy, - unprotect = dummy, - -- luatex - tex = tex, - -- lua - string = string, - table = table, - lpeg = lpeg, - math = math, - io = io, - -- - -- maybe other l-*, xml etc -} +--[[ldx-- +Another approach is to freeze tables by using a metatable, this will be +implemented stepwise.
+--ldx]]-- -- moduledata : no need for protection (only for developers) -- isolatedata : full protection -- userdata : protected -- thirddata : protected -userdata, thirddata = nil, nil - --- we could have a metatable that automaticaly creates a top level namespace +--[[ldx-- +We could have a metatable that automaticaly creates a top level namespace.
+--ldx]]-- -if not setfenv then - texio.write_nl("warning: we need to fix setfenv by using 'load in' or '_ENV'") -end +local luanames = lua.name -- luatex itself + +lua.numbers = lua.numbers or { } local numbers = lua.numbers +lua.messages = lua.messages or { } local messages = lua.messages + +storage.register("lua/numbers", numbers, "lua.numbers" ) +storage.register("lua/messages", messages, "lua.messages") + +local setfenv = setfenv or debug.setfenv -- < 5.2 + +if setfenv then + + local protected = { + -- global table + global = global, + -- user tables + -- moduledata = moduledata, + userdata = userdata, + thirddata = thirddata, + documentdata = documentdata, + -- reserved + protect = dummy, + unprotect = dummy, + -- luatex + tex = tex, + -- lua + string = string, + table = table, + lpeg = lpeg, + math = math, + io = io, + file = file, + bit32 = bit32, + -- + context = context, + } + + local protect_full = function(name) + local t = { } + for k, v in next, protected do + t[k] = v + end + return t + end -local function protect_full(name) - local t = { } - for k, v in next, protected do - t[k] = v + local protect_part = function(name) -- adds + local t = rawget(global,name) + if not t then + t = { } + for k, v in next, protected do + t[k] = v + end + rawset(global,name,t) + end + return t end - return t -end -local function protect_part(name) ---~ local t = global[name] - local t = rawget(global,name) - if not t then - t = { } - for k, v in next, protected do - t[k] = v + protect = function(name) + if name == "isolateddata" then + setfenv(2,protect_full(name)) + else + setfenv(2,protect_part(name or "shareddata")) end ---~ global[name] = t - rawset(global,name,t) end - return t -end -function protect(name) - if name == "isolateddata" then - setfenv(2,protect_full(name)) - else - setfenv(2,protect_part(name or "shareddata")) + function lua.registername(name,message) + local lnn = lua.numbers[name] + if not lnn then + lnn = #messages + 1 + messages[lnn] = message + numbers[name] = lnn + end + luanames[lnn] = message + context(lnn) + -- initialize once + if name ~= "isolateddata" then + protect_full(name or "shareddata") + end end -end -lua.numbers = { } -lua.messages = { } +elseif libraries then -- assume >= 5.2 + + local shared + + protect = function(name) + if not shared then + -- e.g. context is not yet known + local public = { + global = global, + -- moduledata = moduledata, + userdata = userdata, + thirddata = thirddata, + documentdata = documentdata, + protect = dummy, + unprotect = dummy, + context = context, + } + -- + for k, v in next, libraries.builtin do public[k] = v end + for k, v in next, libraries.functions do public[k] = v end + for k, v in next, libraries.obsolete do public[k] = nil end + -- + shared = { __index = public } + protect = function(name) + local t = global[name] or { } + setmetatable(t,shared) -- set each time + return t + end + end + return protect(name) + end -function lua.registername(name,message) - local lnn = lua.numbers[name] - if not lnn then - lnn = #lua.messages + 1 - lua.messages[lnn] = message - lua.numbers[name] = lnn + function lua.registername(name,message) + local lnn = lua.numbers[name] + if not lnn then + lnn = #messages + 1 + messages[lnn] = message + numbers[name] = lnn + end + luanames[lnn] = message + context(lnn) end - lua.name[lnn] = message - context(lnn) - -- initialize once - if name ~= "isolateddata" then - protect_full(name or "shareddata") + +else + + protect = dummy + + function lua.registername(name,message) + local lnn = lua.numbers[name] + if not lnn then + lnn = #messages + 1 + messages[lnn] = message + numbers[name] = lnn + end + luanames[lnn] = message + context(lnn) end -end ---~ function lua.checknames() ---~ lua.name[0] = "ctx" ---~ for k, v in next, lua.messages do ---~ lua.name[k] = v ---~ end ---~ end +end -storage.register("lua/numbers", lua.numbers, "lua.numbers") -storage.register("lua/messages", lua.messages, "lua.messages") diff --git a/tex/context/base/luat-ini.mkiv b/tex/context/base/luat-ini.mkiv index 8ce5e80fd..a3a590311 100644 --- a/tex/context/base/luat-ini.mkiv +++ b/tex/context/base/luat-ini.mkiv @@ -27,14 +27,14 @@ %D A few more goodies: -\unexpanded\def\startlua {\luat_start_lua } \let\stoplua \relax % tex catcodes -\unexpanded\def\startluacode{\luat_start_lua_code} \let\stopluacode\relax % lua catcodes +\let\stoplua \relax % tex catcodes +\let\stopluacode\relax % lua catcodes % It might makes sense to have a \type {\directelua} so that we can avoid % the \type {\normalexpanded} around \type {\directlua}. Something to discuss % in the team. -\def\luat_start_lua +\unexpanded\def\startlua % \stoplua {\begingroup \obeylualines \luat_start_lua_indeed} @@ -42,7 +42,7 @@ \def\luat_start_lua_indeed#1\stoplua {\normalexpanded{\endgroup\noexpand\directlua{#1}}} % \zerocount is default -\def\luat_start_lua_code +\unexpanded\def\startluacode % \stopluacode {\begingroup \obeylualines \obeyluatokens @@ -62,42 +62,46 @@ \newtoks\everyluacode -\edef\lualetterbackslash{\string\\} -\edef\lualetterbar {\string\|} \edef\lualetterdash {\string\-} -\edef\lualetterlparent {\string\(} \edef\lualetterrparent {\string\)} -\edef\lualetterlbrace {\string\{} \edef\lualetterrbrace {\string\}} -\edef\lualettersquote {\string\'} \edef\lualetterdquote {\string\"} -\edef\lualettern {\string\n} \edef\lualetterr {\string\r} -\edef\lualetterf {\string\f} \edef\lualettert {\string\t} -\edef\lualettera {\string\a} \edef\lualetterb {\string\b} -\edef\lualetterv {\string\v} \edef\lualetters {\string\s} -\edef\lualetterone {\string\1} \edef\lualettertwo {\string\2} -\edef\lualetterthree {\string\3} \edef\lualetterfour {\string\4} -\edef\lualetterfive {\string\5} \edef\lualettersix {\string\6} -\edef\lualetterseven {\string\7} \edef\lualettereight {\string\8} -\edef\lualetternine {\string\9} \edef\lualetterzero {\string\0} +\edef\lua_letter_backslash{\string\\} +\edef\lua_letter_bar {\string\|} \edef\lua_letter_dash {\string\-} +\edef\lua_letter_lparent {\string\(} \edef\lua_letter_rparent {\string\)} +\edef\lua_letter_lbrace {\string\{} \edef\lua_letter_rbrace {\string\}} +\edef\lua_letter_squote {\string\'} \edef\lua_letter_dquote {\string\"} +\edef\lua_letter_n {\string\n} \edef\lua_letter_r {\string\r} +\edef\lua_letter_f {\string\f} \edef\lua_letter_t {\string\t} +\edef\lua_letter_a {\string\a} \edef\lua_letter_b {\string\b} +\edef\lua_letter_v {\string\v} \edef\lua_letter_s {\string\s} +\edef\lua_letter_one {\string\1} \edef\lua_letter_two {\string\2} +\edef\lua_letter_three {\string\3} \edef\lua_letter_four {\string\4} +\edef\lua_letter_five {\string\5} \edef\lua_letter_six {\string\6} +\edef\lua_letter_seven {\string\7} \edef\lua_letter_eight {\string\8} +\edef\lua_letter_nine {\string\9} \edef\lua_letter_zero {\string\0} \appendtoks - \let\\\lualetterbackslash - \let\|\lualetterbar \let\-\lualetterdash - \let\(\lualetterlparent \let\)\lualetterrparent - \let\{\lualetterlbrace \let\}\lualetterrbrace - \let\'\lualettersquote \let\"\lualetterdquote - \let\n\lualettern \let\r\lualetterr - \let\f\lualetterf \let\t\lualettert - \let\a\lualettera \let\b\lualetterb - \let\v\lualetterv \let\s\lualetters - \let\1\lualetterone \let\2\lualettertwo - \let\3\lualetterthree \let\4\lualetterfour - \let\5\lualetterfive \let\6\lualettersix - \let\7\lualetterseven \let\8\lualettereight - \let\9\lualetternine \let\0\lualetterzero + \let\\\lua_letter_backslash + \let\|\lua_letter_bar \let\-\lua_letter_dash + \let\(\lua_letter_lparent \let\)\lua_letter_rparent + \let\{\lua_letter_lbrace \let\}\lua_letter_rbrace + \let\'\lua_letter_squote \let\"\lua_letter_dquote + \let\n\lua_letter_n \let\r\lua_letter_r + \let\f\lua_letter_f \let\t\lua_letter_t + \let\a\lua_letter_a \let\b\lua_letter_b + \let\v\lua_letter_v \let\s\lua_letter_s + \let\1\lua_letter_one \let\2\lua_letter_two + \let\3\lua_letter_three \let\4\lua_letter_four + \let\5\lua_letter_five \let\6\lua_letter_six + \let\7\lua_letter_seven \let\8\lua_letter_eight + \let\9\lua_letter_nine \let\0\lua_letter_zero \to \everyluacode -\def\obeyluatokens +\unexpanded\def\obeyluatokens {\setcatcodetable\luacatcodes \the\everyluacode} + +\edef\luamajorversion{\ctxwrite{_MINORVERSION}} +\edef\luaminorversion{\ctxwrite{_MAJORVERSION}} + %D \macros %D {definenamedlua} %D @@ -110,19 +114,20 @@ \installcorenamespace{luacode} -\def\luat_start_named_lua_code#1% +\unexpanded\def\luat_start_named_lua_code#1% {\begingroup \obeylualines \obeyluatokens \csname\??luacode#1\endcsname} -\unexpanded\def\definenamedlua[#1]#2[#3]% no optional arg handling here yet +\unexpanded\def\definenamedlua[#1]#2[#3]% no optional arg handling here yet / we could use numbers instead (more efficient) {\ifcsname\??luacode#1\endcsname \else \scratchcounter\ctxlua{lua.registername("#1","#3")}% - \normalexpanded{\edef\csname\??luacode#1\endcsname##1\csname\e!stop#1\v!code\endcsname}% - {\endgroup\noexpand\directlua\the\scratchcounter{protect("#1\s!data")##1}}% - \expandafter\def \csname\e!start#1\v!code\endcsname {\luat_start_named_lua_code{#1}}% - \expandafter\edef\csname #1\v!code\endcsname##1{\noexpand\directlua\the\scratchcounter{protect("#1\s!data")##1}}% + \normalexpanded{\xdef\csname\??luacode#1\endcsname##1\csname\e!stop#1\v!code\endcsname}% + %{\endgroup\noexpand\directlua\the\scratchcounter{local _ENV=protect("#1\s!data")##1}}% + {\noexpand\normalexpanded{\endgroup\noexpand\directlua\the\scratchcounter{local _ENV=protect("#1\s!data")##1}}}% + \expandafter\edef\csname\e!start#1\v!code\endcsname {\luat_start_named_lua_code{#1}}% + \expandafter\edef\csname #1\v!code\endcsname##1{\noexpand\directlua\the\scratchcounter{local _ENV=protect("#1\s!data")##1}}% \fi} %D We predefine a few. @@ -142,12 +147,25 @@ %D %D \startusercode %D global.context("USER 1") +%D context.par() %D context("USER 2") +%D context.par() %D if characters then -%D context("ACCESS") +%D context("ACCESS directly") +%D elseif global.characters then +%D context("ACCESS via global") +%D else +%D context("NO ACCESS at all") +%D end +%D context.par() +%D if bogus then +%D context("ACCESS directly") +%D elseif global.bogus then +%D context("ACCESS via global") %D else -%D context("NO ACCESS") +%D context("NO ACCESS at all") %D end +%D context.par() %D \stopusercode %D \stopbuffer %D diff --git a/tex/context/base/lxml-ent.lua b/tex/context/base/lxml-ent.lua index be69dec00..14b758f1c 100644 --- a/tex/context/base/lxml-ent.lua +++ b/tex/context/base/lxml-ent.lua @@ -7,9 +7,8 @@ if not modules then modules = { } end modules ['lxml-ent'] = { } local type, next, tonumber = type, next, tonumber -local utf = unicode.utf8 local byte, format = string.byte, string.format -local utfupper, utfchar = utf.upper, utf.char +local utfchar = utf.char local lpegmatch = lpeg.match --[[ldx-- diff --git a/tex/context/base/lxml-inf.lua b/tex/context/base/lxml-inf.lua index 834d152fd..8f1157c7d 100644 --- a/tex/context/base/lxml-inf.lua +++ b/tex/context/base/lxml-inf.lua @@ -8,6 +8,8 @@ if not modules then modules = { } end modules ['lxml-inf'] = { -- This file will be loaded runtime by x-pending.tex. +local concat = table.concat + local xmlwithelements = xml.withelements local getid = lxml.getid diff --git a/tex/context/base/lxml-lpt.lua b/tex/context/base/lxml-lpt.lua index 0c10998a0..4e84930f1 100644 --- a/tex/context/base/lxml-lpt.lua +++ b/tex/context/base/lxml-lpt.lua @@ -10,7 +10,7 @@ if not modules then modules = { } end modules ['lxml-lpt'] = { -- todo: B/C/[get first match] local concat, remove, insert = table.concat, table.remove, table.insert -local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring +local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns @@ -682,7 +682,7 @@ end local function register_expression(expression) local converted = lpegmatch(converter,expression) - local runner = loadstring(format(template_e,converted)) + local runner = load(format(template_e,converted)) runner = (runner and runner()) or function() errorrunner_e(expression,converted) end return { kind = "expression", expression = expression, converted = converted, evaluator = runner } end @@ -690,9 +690,9 @@ end local function register_finalizer(protocol,name,arguments) local runner if arguments and arguments ~= "" then - runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments)) + runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments)) else - runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name)) + runner = load(format(template_f_n,protocol or xml.defaultprotocol,name)) end runner = (runner and runner()) or function() errorrunner_f(name,arguments) end return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner } @@ -1116,6 +1116,7 @@ end expressions.child = function(e,pattern) return applylpath(e,pattern) -- todo: cache end + expressions.count = function(e,pattern) -- what if pattern == empty or nil local collected = applylpath(e,pattern) -- todo: cache return pattern and (collected and #collected) or 0 @@ -1123,13 +1124,30 @@ end -- external -expressions.oneof = function(s,...) -- slow - local t = {...} for i=1,#t do if s == t[i] then return true end end return false +-- expressions.oneof = function(s,...) +-- local t = {...} +-- for i=1,#t do +-- if s == t[i] then +-- return true +-- end +-- end +-- return false +-- end + +expressions.oneof = function(s,...) + for i=1,select("#",...) do + if s == select(i,...) then + return true + end + end + return false end + expressions.error = function(str) xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?"))) return false end + expressions.undefined = function(s) return s == nil end diff --git a/tex/context/base/lxml-tab.lua b/tex/context/base/lxml-tab.lua index 21c7561e8..04b4192c0 100644 --- a/tex/context/base/lxml-tab.lua +++ b/tex/context/base/lxml-tab.lua @@ -39,7 +39,6 @@ local xml = xml --~ local xml = xml -local utf = unicode.utf8 local concat, remove, insert = table.concat, table.remove, table.insert local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber local format, lower, find, match, gsub = string.format, string.lower, string.find, string.match, string.gsub diff --git a/tex/context/base/lxml-tex.lua b/tex/context/base/lxml-tex.lua index 6ff96bf05..8a57ed54e 100644 --- a/tex/context/base/lxml-tex.lua +++ b/tex/context/base/lxml-tex.lua @@ -10,12 +10,10 @@ if not modules then modules = { } end modules ['lxml-tex'] = { -- interface and not the context one. If we ever do that there will -- be an cldf-xml helper library. -local utf = unicode.utf8 - -local utfchar, utfupper = utf.char, utf.upper +local utfchar = utf.char local concat, insert, remove = table.concat, table.insert, table.remove local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match -local type, next, tonumber, tostring = type, next, tonumber, tostring +local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select local lpegmatch = lpeg.match local P, S, C, Cc = lpeg.P, lpeg.S, lpeg.C, lpeg.Cc @@ -125,7 +123,7 @@ function lxml.resolvedentity(str) end context(err) else - local tag = utfupper(str) + local tag = upperchars(str) if trace_entities then report_xml("passing entity '%s' to \\xmle using tag '%s'",str,tag) end @@ -841,10 +839,32 @@ function lxml.installsetup(what,document,setup,where) end end +-- function lxml.flushsetups(id,...) +-- local done, list = { }, { ... } +-- for i=1,#list do +-- local document = list[i] +-- local sd = setups[document] +-- if sd then +-- for k=1,#sd do +-- local v= sd[k] +-- if not done[v] then +-- if trace_loading then +-- report_lxml("applying setup %02i = %s to %s",k,v,document) +-- end +-- contextsprint(ctxcatcodes,"\\xmlsetup{",id,"}{",v,"}") +-- done[v] = true +-- end +-- end +-- elseif trace_loading then +-- report_lxml("no setups for %s",document) +-- end +-- end +-- end + function lxml.flushsetups(id,...) - local done, list = { }, { ... } - for i=1,#list do - local document = list[i] + local done = { } + for i=1,select("#",...) do + local document = select(i,...) local sd = setups[document] if sd then for k=1,#sd do diff --git a/tex/context/base/m-database.lua b/tex/context/base/m-database.lua index b9ec3aa36..caa40f8f6 100644 --- a/tex/context/base/m-database.lua +++ b/tex/context/base/m-database.lua @@ -20,12 +20,16 @@ local report_database = logs.reporter("database") buffers.database = buffers.database or { } +local l_tab = lpegpatterns.tab +local l_space = lpegpatterns.space +local l_comma = lpegpatterns.comma + local separators = { -- not interfaced - tab = lpegpatterns.tab, - tabs = lpegpatterns.tab^1, - comma = lpegpatterns.comma, - space = lpegpatterns.space, - spaces = lpegpatterns.space^1, + tab = l_tab, + tabs = l_tab^1, + comma = l_comma, + space = l_space, + spaces = l_space^1, } function buffers.database.process(settings) @@ -54,7 +58,7 @@ function buffers.database.process(settings) local quotedata = nil for chr in gmatch(quotechar,".") do local quotechar = lpegP(chr) - local quoteword = quotechar * lpeg.C((1 - quotechar)^0) * quotechar + local quoteword = l_space^0 * quotechar * lpegC((1 - quotechar)^0) * quotechar * l_space^0 if quotedata then quotedata = quotedata + quoteword else diff --git a/tex/context/base/m-morse.mkvi b/tex/context/base/m-morse.mkvi index 6082d588a..a2c20dff7 100644 --- a/tex/context/base/m-morse.mkvi +++ b/tex/context/base/m-morse.mkvi @@ -100,16 +100,22 @@ local codes = { morse.codes = codes -setmetatable(codes, { __index = function(t,k) +local fallbackself = false + +local function codefallback(t,k) if k then local u = ucchars[k] local v = rawget(t,u) or rawget(t,shchars[u]) or false t[k] = v return v + elseif fallbackself then + return k else return false end -end }) +end + +table.setmetatableindex(codes,codefallback) local MorseBetweenWords = context.MorseBetweenWords local MorseBetweenCharacters = context.MorseBetweenCharacters @@ -228,16 +234,19 @@ end \def\MorseSpace {\hskip7\dimexpr\MorseWidth\relax} \def\MorseUnknown #text{[\detokenize{#text}]} -\unexpanded\def\MorseCode #text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es,true)}} -\unexpanded\def\MorseString#text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es)}} -\unexpanded\def\MorseFile #text{\ctxlua{moduledata.morse.filetomorse("#text")}} -\unexpanded\def\MorseTable {\ctxlua{moduledata.morse.showtable()}} +\unexpanded\def\MorseCode #text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es,true)}} +\unexpanded\def\MorseString #text{\ctxlua{moduledata.morse.tomorse(\!!bs#text\!!es)}} +\unexpanded\def\MorseFile #text{\ctxlua{moduledata.morse.filetomorse("#text")}} +\unexpanded\def\MorseTable {\ctxlua{moduledata.morse.showtable()}} \let\Morse \MorseString +%def\MorseShort {·} +%def\MorseLong {—} + \protect -\continueifinputfile{m-morse.mkiv} +\continueifinputfile{m-morse.mkvi} \starttext diff --git a/tex/context/base/m-timing.mkiv b/tex/context/base/m-timing.mkiv index 18646cfae..5502768f6 100644 --- a/tex/context/base/m-timing.mkiv +++ b/tex/context/base/m-timing.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\ifx\ShowNamedUsage\undefined \else \endinput \fi +\ifdefined\ShowNamedUsage \endinput \fi %D Written at the end of 2007, this module is dedicated to Taco. Reaching this %D point in \LUATEX\ was a non trivial effort. By visualizing a bit what happens @@ -55,12 +55,13 @@ end \stopnotmode \unexpanded\def\ShowNamedUsage#1#2#3% - {\setbox\scratchbox\vbox\bgroup\startMPcode + {\setbox\scratchbox\vbox\bgroup + \startMPcode begingroup ; save p, q, b, h, w ; path p, q, b ; numeric h, w ; p := \cldcontext{moduledata.progress.path("#1","#2")} ; % p := p shifted -llcorner p ; - if bbwidth(p) > 1 : + if bbwidth(p) > 0 : h := 100 ; w := 2 * h ; w := \the\textwidth-3pt ; % correct for pen p := p xstretched w ; @@ -79,7 +80,8 @@ end fi ; fi ; endgroup ; - \stopMPcode\egroup + \stopMPcode + \egroup \scratchdimen\wd\scratchbox \ifdim\scratchdimen>\zeropoint \startlinecorrection diff --git a/tex/context/base/math-ini.lua b/tex/context/base/math-ini.lua index c83bac309..46c366c3e 100644 --- a/tex/context/base/math-ini.lua +++ b/tex/context/base/math-ini.lua @@ -11,7 +11,8 @@ if not modules then modules = { } end modules ['math-ini'] = { -- -- isn't characters.data loaded already ... shortcut it here -local format, utfchar, utfbyte = string.format, utf.char, utf.byte +local format = string.format +local utfchar, utfbyte = utf.char, utf.byte local setmathcode, setdelcode = tex.setmathcode, tex.setdelcode local texattribute = tex.attribute local floor = math.floor diff --git a/tex/context/base/math-ini.mkiv b/tex/context/base/math-ini.mkiv index 3de4c808f..1e78ff492 100644 --- a/tex/context/base/math-ini.mkiv +++ b/tex/context/base/math-ini.mkiv @@ -1082,6 +1082,18 @@ \textface \fi} +\def\mathstylecommand#1#2#3% + {\ifcase\normalmathstyle + \expandafter#1\or + \expandafter#1\or + \expandafter#1\or + \expandafter#1\or + \expandafter#2\or + \expandafter#2\or + \expandafter#3\or + \expandafter#3\else + \expandafter#1\fi} + %D A plain inheritance: \def\mathpalette#1#2% diff --git a/tex/context/base/math-noa.lua b/tex/context/base/math-noa.lua index e9cf79590..3c67d26a2 100644 --- a/tex/context/base/math-noa.lua +++ b/tex/context/base/math-noa.lua @@ -18,8 +18,6 @@ if not modules then modules = { } end modules ['math-noa'] = { -- 20D6 -> 2190 -- 20D7 -> 2192 -local utf = unicode.utf8 - local utfchar, utfbyte = utf.char, utf.byte local format, rep = string.format, string.rep local concat = table.concat diff --git a/tex/context/base/meta-fun.lua b/tex/context/base/meta-fun.lua index 7594d0c78..78ee25baf 100644 --- a/tex/context/base/meta-fun.lua +++ b/tex/context/base/meta-fun.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['meta-fun'] = { -- very experimental, actually a joke ... see metafun manual for usage -local format, loadstring, type = string.format, loadstring, type +local format, load, type = string.format, load, type local metapost = metapost @@ -39,7 +39,7 @@ function metafun.interpolate(f,b,e,s,c) local done = false context("(") for i=b,e,(e-b)/s do - local d = loadstring(format("return function(x) return %s end",f)) + local d = load(format("return function(x) return %s end",f)) if d then d = d() if done then diff --git a/tex/context/base/meta-imp-dum.mkiv b/tex/context/base/meta-imp-dum.mkiv index 83fe12f09..05d40b70b 100644 --- a/tex/context/base/meta-imp-dum.mkiv +++ b/tex/context/base/meta-imp-dum.mkiv @@ -120,6 +120,7 @@ \c!height=\figureheight, \c!frame=\v!off, \c!strut=\v!no, + \c!align={\v!middle,\v!lohi}, \c!background=figure:placeholder:graphic, \c!foregroundcolor=\s!white]% \doifelse{\externalfigureparameter\c!text}\v!yes diff --git a/tex/context/base/meta-ini.lua b/tex/context/base/meta-ini.lua index 8b6fd22a2..928048776 100644 --- a/tex/context/base/meta-ini.lua +++ b/tex/context/base/meta-ini.lua @@ -7,7 +7,9 @@ if not modules then modules = { } end modules ['meta-ini'] = { } local tonumber = tonumber -local format, gmatch, match, gsub = string.format, string.gmatch, string.match, string.gsub +local format = string.format +local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns +local P, Cs, R, S, C, Cc = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc local context = context @@ -45,46 +47,94 @@ end local colorhash = attributes.list[attributes.private('color')] -local validdimen = lpeg.patterns.validdimen * lpeg.P(-1) - -local lpegmatch = lpeg.match local textype = tex.type local MPcolor = context.MPcolor +-- local validdimen = lpegpatterns.validdimen * P(-1) +-- +-- function commands.prepareMPvariable(v) -- slow but ok +-- if v == "" then +-- MPcolor("black") +-- else +-- local typ, var = match(v,"(.):(.*)") +-- if not typ then +-- -- parse +-- if colorhash[v] then +-- MPcolor(v) +-- elseif tonumber(v) then +-- context(v) +-- elseif lpegmatch(validdimen,v) then +-- return context("\\the\\dimexpr %s",v) +-- else +-- for s in gmatch(v,"\\([a-zA-Z]+)") do -- can have trailing space +-- local t = textype(s) +-- if t == "dimen" then +-- return context("\\the\\dimexpr %s",v) +-- elseif t == "count" then +-- return context("\\the\\numexpr %s",v) +-- end +-- end +-- context("\\number %s",v) -- 0.4 ... +-- end +-- elseif typ == "d" then -- to be documented +-- -- dimension +-- context("\\the\\dimexpr %s",var) +-- elseif typ == "n" then -- to be documented +-- -- number +-- context("\\the\\numexpr %s",var) +-- elseif typ == "s" then -- to be documented +-- -- string +-- context(var) +-- elseif typ == "c" then -- to be documented +-- -- color +-- MPcolor(var) +-- else +-- context(var) +-- end +-- end +-- end + +-- we can actually get the dimen/count values here + +local dimenorname = + lpegpatterns.validdimen / function(s) + context("\\the\\dimexpr %s",s) + end + + (C(lpegpatterns.float) + Cc(1)) * lpegpatterns.space^0 * P("\\") * C(lpegpatterns.letter^1) / function(f,s) + local t = textype(s) + if t == "dimen" then + context("\\the\\dimexpr %s\\%s",f,s) + elseif t == "count" then + context("\\the\\numexpr \\%s * %s\\relax",s,f) --This module is a bit more split up that I'd like but since we also want to test @@ -12601,7 +12620,6 @@ if not modules then modules = { } end modules ['font-def'] = { -- We can overload some of the definers.functions so we don't local them. -local concat = table.concat local format, gmatch, match, find, lower, gsub = string.format, string.gmatch, string.match, string.find, string.lower, string.gsub local tostring, next = tostring, next local lpegmatch = lpeg.match diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index dd2c902cd..6b502cd24 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -22,7 +22,7 @@ if not modules then modules = { } end modules ['luatex-fonts'] = { -- -- Todo: all global namespaces in called modules will get local shortcuts. -utf = unicode.utf8 +utf = utf or unicode.utf8 if not generic_context then @@ -132,10 +132,10 @@ else -- mess up ConTeXt code for the sake of general generality. Around -- version 1.0 there will be an official api defined. + loadmodule('l-lpeg.lua') loadmodule('l-function.lua') loadmodule('l-string.lua') loadmodule('l-table.lua') - loadmodule('l-lpeg.lua') loadmodule('l-boolean.lua') loadmodule('l-math.lua') loadmodule('l-file.lua') -- cgit v1.2.3