diff options
author | Context Git Mirror Bot <phg42.2a@gmail.com> | 2016-01-29 16:15:09 +0100 |
---|---|---|
committer | Context Git Mirror Bot <phg42.2a@gmail.com> | 2016-01-29 16:15:09 +0100 |
commit | 452587cdeefbf6e3bf1eee91e4e976f1135b785f (patch) | |
tree | e52f05dfd327c3b31a1b0fb82545dbdec639d2e2 /tex/context/base/mkiv/lxml-tex.lua | |
parent | 975f4f9f2d71d8021900955404f8b144ca6895f5 (diff) | |
download | context-452587cdeefbf6e3bf1eee91e4e976f1135b785f.tar.gz |
2016-01-28 22:37:00
Diffstat (limited to 'tex/context/base/mkiv/lxml-tex.lua')
-rw-r--r-- | tex/context/base/mkiv/lxml-tex.lua | 206 |
1 files changed, 149 insertions, 57 deletions
diff --git a/tex/context/base/mkiv/lxml-tex.lua b/tex/context/base/mkiv/lxml-tex.lua index aad86ed6d..09f1e10f9 100644 --- a/tex/context/base/mkiv/lxml-tex.lua +++ b/tex/context/base/mkiv/lxml-tex.lua @@ -15,7 +15,9 @@ local concat, insert, remove, sortedkeys = table.concat, table.insert, table.rem local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select local lpegmatch = lpeg.match -local P, S, C, Cc = lpeg.P, lpeg.S, lpeg.C, lpeg.Cc +local P, S, C, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs +local patterns = lpeg.patterns +local setmetatableindex = table.setmetatableindex local tex, xml = tex, xml local lowerchars, upperchars, lettered = characters.lower, characters.upper, characters.lettered @@ -43,9 +45,8 @@ local xmlcollect = xml.collect local xmltext = xml.text local xmltostring = xml.tostring local xmlapplylpath = xml.applylpath -local xmlunprivatized = xml.unprivatized +local xmlunspecialized = xml.unspecialized local xmlprivatetoken = xml.privatetoken -local xmlprivatecodes = xml.privatecodes local xmlstripelement = xml.stripelement local xmlinclusion = xml.inclusion local xmlinclusions = xml.inclusions @@ -78,21 +79,24 @@ local report_lxml = logs.reporter("lxml","tex") local report_xml = logs.reporter("xml","tex") local forceraw = false -local forceraw = nil + +local p_texescape = patterns.texescape -- tex entities --- --- todo: unprivatize attributes lxml.entities = lxml.entities or { } storage.register("lxml/entities",lxml.entities,"lxml.entities") --- xml.placeholders.unknown_any_entity = nil -- has to be per xml +local xmlentities = xml.entities -- these are more or less standard entities +local texentities = lxml.entities -- these are specific for a tex run +local parsedentity = xml.reparsedentitylpeg + +local useelement = false -- probably no longer needed / used -local xmlentities = xml.entities -local texentities = lxml.entities -local parsedentity = xml.parsedentitylpeg +directives.register("lxml.entities.useelement",function(v) + useelement = v +end) function lxml.registerentity(key,value) texentities[key] = value @@ -103,6 +107,7 @@ end function lxml.resolvedentity(str) if forceraw then + -- should not happen as we then can as well bypass this function if trace_entities then report_xml("passing entity %a as &%s;",str,str) end @@ -151,12 +156,19 @@ function lxml.resolvedentity(str) report_xml("passing faulty entity %a as %a",str,err) end context(err) - else + elseif useelement then local tag = upperchars(str) if trace_entities then report_xml("passing entity %a to \\xmle using tag %a",str,tag) end - context.xmle(str,tag) -- we need to use our own upper + contextsprint(texcatcodes,"\\xmle{") + contextsprint(notcatcodes,e) + contextsprint(texcatcodes,"}") + else + if trace_entities then + report_xml("passing entity %a as %a using %a",str,str,"notcatcodes") + end + contextsprint(notcatcodes,str) end end end @@ -181,11 +193,10 @@ local texfinalizers = finalizers.tex -- serialization with entity handling -local exceptions = false - local ampersand = P("&") local semicolon = P(";") -local entity = ampersand * C((1-semicolon)^1) * semicolon / lxml.resolvedentity -- context.bold + +local entity = (ampersand * C((1-semicolon)^1) * semicolon) / lxml.resolvedentity -- context.bold local _, xmltextcapture_yes = context.newtexthandler { catcodes = notcatcodes, @@ -237,7 +248,6 @@ local xmltextcapture = xmltextcapture_yes local xmlspacecapture = xmlspacecapture_yes local xmllinecapture = xmllinecapture_yes local ctxtextcapture = ctxtextcapture_yes -local prefertexentities = true directives.register("lxml.entities.escaped",function(v) if v then @@ -253,10 +263,6 @@ directives.register("lxml.entities.escaped",function(v) end end) -directives.register("lxml.entities.prefertex",function(v) - prefertex = v -end) - -- cdata local toverbatim = context.newverbosehandler { @@ -468,24 +474,35 @@ function xml.load(filename,settings) return xmltable end --- local function entityconverter(id,str,ent) -- todo ent optional --- return xmlentities[str] or ent[str] or xmlprivatetoken(str) or "" -- roundtrip handler --- end - local function entityconverter(id,str,ent) -- todo: disable tex entities when raw - if prefertexentities then - return xmlentities[str] or (texentities[str] and xmlprivatetoken(str)) or ent[str] or xmlprivatetoken(str) or "" -- roundtrip handler - else - return xmlentities[str] or ent[str] or (texentities[str] and xmlprivatetoken(str)) or xmlprivatetoken(str) or "" -- roundtrip handler + -- tex driven entity + local t = texentities[str] + if t then + local p = xmlprivatetoken(str) +-- only once +-- context.xmlprivate(p,t) + return p + end + -- dtd determined entity + local e = ent and ent[str] + if e then + return e + end + -- predefined entity (mathml and so) + local x = xmlentities[str] + if x then + return x end + -- keep original somehow + return xmlprivatetoken(str) end local function lxmlconvert(id,data,compress,currentresource) local settings = { -- we're now roundtrip anyway - unify_predefined_entities = true, - utfize_entities = true, - resolve_predefined_entities = true, - resolve_entities = function(str,ent) return entityconverter(id,str,ent) end, -- needed for mathml + unify_predefined_entities = false, -- is also default + utfize_entities = true, -- is also default + resolve_predefined_entities = true, -- is also default + resolve_entities = function(str,ent) return entityconverter(id,str,ent) end, currentresource = tostring(currentresource or id), } if compress and compress == variables.yes then @@ -619,10 +636,6 @@ function lxml.loaddata(id,str,compress) return xmltable, id end -function lxml.loadregistered(id) - return loaded[id], id -end - -- e.command: -- -- string : setup @@ -642,6 +655,21 @@ end local default_element_handler = xml.gethandlers("verbose").functions["@el@"] +-- local xmlw = setmetatableindex(function(t,k) +-- local v = setmetatableindex(function(t,kk) +-- local v +-- if kk == false then +-- v = "\\xmlw{" .. k .. "}{" +-- else +-- v = "\\xmlw{" .. k .. "}{" .. kk .. "::" +-- end +-- t[kk] = v +-- return v +-- end) +-- t[k]= v +-- return v +-- end) + local function tex_element(e,handlers) local command = e.command if command == nil then @@ -662,9 +690,11 @@ local function tex_element(e,handlers) end -- faster than context.xmlw contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",rootname,"::",ix,"}") + -- contextsprint(ctxcatcodes,xmlw[command][rootname],ix,"}") else report_lxml("fatal error: no index for %a",command) contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",ix or 0,"}") + -- contextsprint(ctxcatcodes,xmlw[command][false],ix or 0,"}") end elseif tc == "function" then command(e) @@ -734,11 +764,16 @@ local function tex_cdata(e,handlers) end end +-- we could try to merge the conversion and flusher but we don't gain much and it makes tracing +-- harder: xunspecialized = utf.remapper(xml.specialcodes,"dynamic",lxml.resolvedentity) + local function tex_text(e) - e = xmlunprivatized(e) + e = xmlunspecialized(e) lpegmatch(xmltextcapture,e) end +-- + local function ctx_text(e) -- can be just context(e) as we split there lpegmatch(ctxtextcapture,e) end @@ -767,7 +802,7 @@ lxml.xmltexhandler = xmltexhandler -- begin of test local function tex_space(e) - e = xmlunprivatized(e) + e = xmlunspecialized(e) lpegmatch(xmlspacecapture,e) end @@ -785,7 +820,7 @@ local xmltexspacehandler = xml.newhandlers { } local function tex_line(e) - e = xmlunprivatized(e) + e = xmlunspecialized(e) lpegmatch(xmllinecapture,e) end @@ -841,13 +876,13 @@ local function sprint(root) -- check rawroot usage local tr = type(root) if tr == "string" then -- can also be result of lpath -- rawroot = false -- ? - root = xmlunprivatized(root) + root = xmlunspecialized(root) lpegmatch(xmltextcapture,root) elseif tr == "table" then if forceraw then rawroot = root -- contextsprint(ctxcatcodes,xmltostring(root)) -- goes wrong with % etc - root = xmlunprivatized(xmltostring(root)) + root = xmlunspecialized(xmltostring(root)) lpegmatch(xmltextcapture,root) -- goes to toc else xmlserialize(root,xmltexhandler) @@ -868,7 +903,7 @@ local function tprint(root) -- we can move sprint inline end end elseif tr == "string" then - root = xmlunprivatized(root) + root = xmlunspecialized(root) lpegmatch(xmltextcapture,root) end end @@ -879,14 +914,14 @@ local function cprint(root) -- content -- quit elseif type(root) == 'string' then -- rawroot = false - root = xmlunprivatized(root) + root = xmlunspecialized(root) lpegmatch(xmltextcapture,root) else local rootdt = root.dt if forceraw then rawroot = root -- contextsprint(ctxcatcodes,xmltostring(rootdt or root)) - root = xmlunprivatized(xmltostring(root)) + root = xmlunspecialized(xmltostring(root)) lpegmatch(xmltextcapture,root) -- goes to toc else xmlserialize(rootdt or root,xmltexhandler) @@ -1261,6 +1296,9 @@ local function index(collected,n) contextsprint(ctxcatcodes,0) -- why ctxcatcodes end +-- the number of commands is often relative small but there can be many calls +-- to this finalizer + local function command(collected,cmd,otherwise) local n = collected and #collected if n and n > 0 then @@ -1286,6 +1324,45 @@ local function command(collected,cmd,otherwise) end end +-- local wildcards = setmetatableindex(function(t,k) +-- local v = false +-- if find(k,"%*") then +-- v = setmetatableindex(function(t,kk) +-- local v = gsub(k,"%*",kk) +-- t[k] = v +-- -- report_lxml("wildcard %a key %a value %a",kk,k,v) +-- return v +-- end) +-- end +-- t[k] = v +-- return v +-- end) +-- +-- local function command(collected,cmd,otherwise) +-- local n = collected and #collected +-- if n and n > 0 then +-- local wildcard = wildcards[cmd] +-- for c=1,n do -- maybe optimize for n=1 +-- local e = collected[c] +-- local ix = e.ix +-- local name = e.name +-- if name and not ix then +-- addindex(name,false,true) +-- ix = e.ix +-- end +-- if not ix or not name then +-- report_lxml("no valid node index for element %a using command %s",name or "?",cmd) +-- elseif wildcard then +-- contextsprint(ctxcatcodes,"\\xmlw{",wildcard[e.tg],"}{",name,"::",ix,"}") +-- else +-- contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}") +-- end +-- end +-- elseif otherwise then +-- contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}") +-- end +-- end + local function attribute(collected,a,default) if collected and #collected > 0 then local at = collected[1].at @@ -1557,7 +1634,22 @@ end function lxml.raw(id,pattern) -- the content, untouched by commands local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id) if collected and #collected > 0 then - contextsprint(notcatcodes,xmltostring(collected[1].dt)) + local s = xmltostring(collected[1].dt) + if s ~= "" then + contextsprint(notcatcodes,s) + end + end +end + +-- templates + +function lxml.rawtex(id,pattern) -- the content, untouched by commands + local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id) + if collected and #collected > 0 then + local s = xmltostring(collected[1].dt) + if s ~= "" then + contextsprint(notcatcodes,lpegmatch(p_texescape,s) or s) + end end end @@ -1681,6 +1773,8 @@ do elseif default and default ~= "" then att = default contextsprint(notcatcodes,default) + else + att = "" end end @@ -1690,14 +1784,16 @@ do local at = e.at if at then att = at[a] - if str and str ~= "" then - str = gsub(str,"^#+","") - if str ~= "" then - contextsprint(notcatcodes,str) + if att and att ~= "" then + att = gsub(att,"^#+","") + if att ~= "" then + contextsprint(notcatcodes,att) + return end end end end + att = "" end function lxml.lastatt() @@ -1706,7 +1802,7 @@ do end -function lxml.name(id) -- or remapped name? -> lxml.info, combine +function lxml.name(id) local e = getid(id) if e then local ns = e.rn or e.ns @@ -1718,7 +1814,7 @@ function lxml.name(id) -- or remapped name? -> lxml.info, combine end end -function lxml.match(id) -- or remapped name? -> lxml.info, combine +function lxml.match(id) local e = getid(id) contextsprint(ctxcatcodes,e and e.mi or 0) end @@ -1733,7 +1829,7 @@ function lxml.tag(id) -- tag vs name -> also in l-xml tag->name end end -function lxml.namespace(id) -- or remapped name? +function lxml.namespace(id) local e = getid(id) if e then local ns = e.rn or e.ns @@ -1803,10 +1899,6 @@ function lxml.elements(id,pattern,reverse) return xmlelements(getid(id),pattern,reverse) end --- obscure ones - -lxml.info = lxml.name - -- testers local found, empty = xml.found, xml.empty @@ -2014,7 +2106,7 @@ xml.pihandlers["injector"] = function(category,rest,e) end end -local pattern = P("context-") * C((1-lpeg.patterns.whitespace)^1) * C(P(1)^1) +local pattern = P("context-") * C((1-patterns.whitespace)^1) * C(P(1)^1) function lxml.applyselectors(id) local root = getid(id) |