summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/lxml-css.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/lxml-css.lua')
-rw-r--r--tex/context/base/mkiv/lxml-css.lua721
1 files changed, 718 insertions, 3 deletions
diff --git a/tex/context/base/mkiv/lxml-css.lua b/tex/context/base/mkiv/lxml-css.lua
index fa921b24f..b2198f341 100644
--- a/tex/context/base/mkiv/lxml-css.lua
+++ b/tex/context/base/mkiv/lxml-css.lua
@@ -6,10 +6,12 @@ if not modules then modules = { } end modules ['lxml-css'] = {
license = "see context related readme files"
}
-local tonumber, rawset = tonumber, rawset
-local lower, format = string.lower, string.format
-local P, S, C, R, Cb, Cg, Carg, Ct, Cc, Cf = lpeg.P, lpeg.S, lpeg.C, lpeg.R, lpeg.Cb, lpeg.Cg, lpeg.Carg, lpeg.Ct, lpeg.Cc, lpeg.Cf
+local tonumber, rawset, type = tonumber, rawset, type
+local lower, format, find, gmatch = string.lower, string.format, string.find, string.gmatch
+local topattern, is_empty = string.topattern, string.is_empty
+local P, S, C, R, Cb, Cg, Carg, Ct, Cc, Cf, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.R, lpeg.Cb, lpeg.Cg, lpeg.Carg, lpeg.Ct, lpeg.Cc, lpeg.Cf, lpeg.Cs
local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
+local sort = table.sort
xml.css = xml.css or { }
local css = xml.css
@@ -169,3 +171,716 @@ function css.colorspecification(str)
local c = str and attributes.colors.values[tonumber(str)]
return c and format("rgb(%s%%,%s%%,%s%%)",c[3]*100,c[4]*100,c[5]*100)
end
+
+-- The following might be handy. It hooks into the normal parser as <selector>
+-- and should work ok with the rest. It's sometimes even a bit faster but that might
+-- change. It's somewhat optimized but not too aggressively.
+
+-- element-1 > element-2 : element-2 with parent element-1
+
+local function s_element_a(list,collected,c,negate,str,dummy,dummy,n)
+ local all = str == "*"
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ local ok = all or ll.tg == str
+ if negate then
+ ok = not ok
+ end
+ if ok then
+ c = c + 1
+ collected[c] = ll
+ end
+ if (not n or n > 1) and dt then
+ c = s_element_a(dt,collected,c,negate,str,dummy,dummy,n and n+1 or 1)
+ end
+ end
+ end
+ return c
+end
+
+-- element-1 + element-2 : element-2 preceded by element-1
+
+local function s_element_b(list,collected,c,negate,str)
+ local all = str == "*"
+ for l=1,#list do
+ local ll = list[l]
+ local pp = ll.__p__
+ if pp then
+ local dd = pp.dt
+ if dd then
+ local ni = ll.ni
+ local d = dd[ni+1]
+ local dt = d and d.dt
+ if not dt then
+ d = dd[ni+2]
+ dt = d and d.dt
+ end
+ if dt then
+ local ok = all or d.tg == str
+ if negate then
+ ok = not ok
+ end
+ if ok then
+ c = c + 1
+ collected[c] = d
+ end
+ end
+ end
+ end
+ end
+ return c
+end
+
+-- element-1 ~ element-2 : element-2 preceded by element-1 -- ?
+
+local function s_element_c(list,collected,c,negate,str)
+ local all = str == "*"
+ for l=1,#list do
+ local ll = list[l]
+ local pp = ll.__p__
+ if pp then
+ local dt = pp.dt
+ if dt then
+ local ni = ll.ni
+ for i=ni+1,#dt do
+ local d = dt[i]
+ local dt = d.dt
+ if dt then
+ local ok = all or d.tg == str
+ if negate then
+ ok = not ok
+ end
+ if ok then
+ c = c + 1
+ collected[c] = d
+ end
+ end
+ end
+ end
+ end
+ end
+ return c
+end
+
+-- element
+-- element-1 element-2 : element-2 inside element-1
+
+local function s_element_d(list,collected,c,negate,str)
+ if str == "*" then
+ if not negate then
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ if not ll.special then
+ c = c + 1
+ collected[c] = ll
+ end
+ c = s_element_d(dt,collected,c,negate,str)
+ end
+ end
+ end
+ else
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ if not ll.special then
+ local ok = ll.tg == str
+ if negate then
+ ok = not ok
+ end
+ if ok then
+ c = c + 1
+ collected[c] = ll
+ end
+ end
+ c = s_element_d(dt,collected,c,negate,str)
+ end
+ end
+ end
+ return c
+end
+
+-- [attribute]
+-- [attribute=value] equals
+-- [attribute~=value] contains word
+-- [attribute^="value"] starts with
+-- [attribute$="value"] ends with
+-- [attribute*="value"] contains
+
+-- .class (no need to optimize)
+-- #id (no need to optimize)
+
+local function s_attribute(list,collected,c,negate,str,what,value)
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ local at = ll.at
+ if at then
+ local v = at[str]
+ local ok = negate
+ if v then
+ if not what then
+ ok = not negate
+ elseif what == 1 then
+ if v == value then
+ ok = not negate
+ end
+ elseif what == 2 then
+ -- todo: lpeg
+ if find(v,value) then
+ ok = not negate
+ end
+ elseif what == 3 then
+ -- todo: lpeg
+ if find(v," ") then
+ for s in gmatch(v,"[^ ]+") do
+ if s == value then
+ ok = not negate
+ break
+ end
+ end
+ elseif v == value then
+ ok = not negate
+ end
+ end
+ end
+ if ok then
+ c = c + 1
+ collected[c] = ll
+ end
+ end
+ c = s_attribute(dt,collected,c,negate,str,what,value)
+ end
+ end
+ return c
+end
+
+-- :nth-child(n)
+-- :nth-last-child(n)
+-- :first-child
+-- :last-child
+
+local function filter_down(collected,c,negate,dt,a,b)
+ local t = { }
+ local n = 0
+ for i=1,#dt do
+ local d = dt[i]
+ if type(d) == "table" then
+ n = n + 1
+ t[n] = i
+ end
+ end
+ if n == 0 then
+ return 0
+ end
+ local m = a
+ while true do
+ if m > n then
+ break
+ end
+ if m > 0 then
+ t[m] = -t[m] -- sign signals match
+ end
+ m = m + b
+ end
+ if negate then
+ for i=n,1-1 do
+ local ti = t[i]
+ if ti > 0 then
+ local di = dt[ti]
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ else
+ for i=n,1,-1 do
+ local ti = t[i]
+ if ti < 0 then
+ ti = - ti
+ local di = dt[ti]
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ end
+ return c
+end
+
+local function filter_up(collected,c,negate,dt,a,b)
+ local t = { }
+ local n = 0
+ for i=1,#dt do
+ local d = dt[i]
+ if type(d) == "table" then
+ n = n + 1
+ t[n] = i
+ end
+ end
+ if n == 0 then
+ return 0
+ end
+ if not b then
+ b = 0
+ end
+ local m = n - a
+ while true do
+ if m < 1 then
+ break
+ end
+ if m < n then
+ t[m] = -t[m] -- sign signals match
+ end
+ m = m - b
+ end
+ if negate then
+ for i=1,n do
+ local ti = t[i]
+ if ti > 0 then
+ local di = dt[ti]
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ else
+ for i=1,n do
+ local ti = t[i]
+ if ti < 0 then
+ ti = - ti
+ local di = dt[ti]
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ end
+ return c
+end
+
+local function just(collected,c,negate,dt,a,start,stop,step)
+ local m = 0
+ for i=start,stop,step do
+ local d = dt[i]
+ if type(d) == "table" then
+ m = m + 1
+ if negate then
+ if a ~= m then
+ c = c + 1
+ collected[c] = d
+ end
+ else
+ if a == m then
+ c = c + 1
+ collected[c] = d
+ break
+ end
+ end
+ end
+ end
+ return c
+end
+
+local function s_nth_child(list,collected,c,negate,a,n,b)
+ if n == "n" then
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ c = filter_up(collected,c,negate,dt,a,b)
+ end
+ end
+ else
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ c = just(collected,c,negate,dt,a,1,#dt,1)
+ end
+ end
+ end
+ return c
+end
+
+local function s_nth_last_child(list,collected,c,negate,a,n,b)
+ if n == "n" then
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ c = filter_down(collected,c,negate,dt,a,b)
+ end
+ end
+ else
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ c = just(collected,c,negate,dt,a,#dt,1,-1)
+ end
+ end
+ end
+ return c
+end
+
+-- :nth-of-type(n)
+-- :nth-last-of-type(n)
+-- :first-of-type
+-- :last-of-type
+
+local function s_nth_of_type(list,collected,c,negate,a,n,b)
+ if n == "n" then
+ return filter_up(collected,c,negate,list,a,b)
+ else
+ return just(collected,c,negate,list,a,1,#list,1)
+ end
+end
+
+local function s_nth_last_of_type(list,collected,c,negate,a,n,b)
+ if n == "n" then
+ return filter_down(collected,c,negate,list,a,b)
+ else
+ return just(collected,c,negate,list,a,#list,1,-1)
+ end
+end
+
+-- :only-of-type
+
+local function s_only_of_type(list,collected,c,negate)
+ if negate then
+ for i=1,#list do
+ c = c + 1
+ collected[c] = list[i]
+ end
+ else
+ if #list == 1 then
+ c = c + 1
+ collected[c] = list[1]
+ end
+ end
+ return c
+end
+
+-- :only-child
+
+local function s_only_child(list,collected,c,negate)
+ if negate then
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ for i=1,#dt do
+ local di = dt[i]
+ if type(di) == "table" then
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ end
+ end
+ else
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt and #dt == 1 then
+ local di = dt[1]
+ if type(di) == "table" then
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ end
+ end
+ return c
+end
+
+-- :empty
+
+local function s_empty(list,collected,c,negate)
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then
+ local dn = #dt
+ local ok = dn == 0
+ if not ok and dn == 1 then
+ local d = dt[1]
+ if type(d) == "string" and is_empty(d) then
+ ok = true
+ end
+ end
+ if negate then
+ ok = not ok
+ end
+ if ok then
+ c = c + 1
+ collected[c] = ll
+ end
+ end
+ end
+ return c
+end
+
+-- :root
+
+local function s_root(list,collected,c,negate)
+ for l=1,#list do
+ local ll = list[l]
+ if type(ll) == "table" then
+ local r = xml.root(ll)
+ if r then
+ if r.special and r.tg == "@rt@" then
+ r = r.dt[r.ri]
+ end
+ c = c + 1
+ collected[c] = r
+ break
+ end
+ end
+ end
+ return c
+end
+
+local P, R, S, C, Cs, Ct, Cc, Carg, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc, lpeg.Carg, lpeg.match
+
+local whitespace = lpegpatterns.whitespace
+local p_number = lpegpatterns.integer / tonumber
+local p_space = whitespace^0
+
+local p_key = C((R("az","AZ","09") + S("_-"))^1)
+local p_left = S("#.[],:()")
+local p_right = S("#.[],:() ")
+local p_tag = C((1-p_left) * (1-p_right)^0)
+local p_value = C((1-P("]"))^0)
+local p_unquoted = (P('"')/"") * C((1-P('"'))^0) * (P('"')/"")
+ + (1-P("]"))^1
+local p_element = Ct( (
+ P(">") * p_space * Cc(s_element_a) +
+ P("+") * p_space * Cc(s_element_b) +
+ P("~") * p_space * Cc(s_element_c) +
+ Cc(s_element_d)
+ ) * p_tag )
+local p_attribute = P("[") * Ct(Cc(s_attribute) * p_key * (
+ P("=" ) * Cc(1) * Cs( p_unquoted)
+ + P("^=") * Cc(2) * Cs(Cc("^") * (p_unquoted / topattern))
+ + P("$=") * Cc(2) * Cs( p_unquoted / topattern * Cc("$"))
+ + P("*=") * Cc(2) * Cs( p_unquoted / topattern)
+ + P("~=") * Cc(3) * Cs( p_unquoted)
+ )^0 * P("]"))
+
+local p_separator = p_space * P(",") * p_space
+
+local p_formula = p_space * P("(")
+ * p_space
+ * (
+ p_number * p_space * (C("n") * p_space * (p_number + Cc(0)))^-1
+ + P("even") * Cc(0) * Cc("n") * Cc(2)
+ + P("odd") * Cc(-1) * Cc("n") * Cc(2)
+ )
+ * p_space
+ * P(")")
+
+local p_step = P(".") * Ct(Cc(s_attribute) * Cc("class") * Cc(3) * p_tag)
+ + P("#") * Ct(Cc(s_attribute) * Cc("id") * Cc(1) * p_tag)
+ + p_attribute
+ + p_element
+ + P(":nth-child") * Ct(Cc(s_nth_child) * p_formula)
+ + P(":nth-last-child") * Ct(Cc(s_nth_last_child) * p_formula)
+ + P(":first-child") * Ct(Cc(s_nth_child) * Cc(1))
+ + P(":last-child") * Ct(Cc(s_nth_last_child) * Cc(1))
+ + P(":only-child") * Ct(Cc(s_only_child) )
+ + P(":nth-of-type") * Ct(Cc(s_nth_of_type) * p_formula)
+ + P(":nth-last-of-type") * Ct(Cc(s_nth_last_of_type) * p_formula)
+ + P(":first-of-type") * Ct(Cc(s_nth_of_type) * Cc(1))
+ + P(":last-of-type") * Ct(Cc(s_nth_last_of_type) * Cc(1))
+ + P(":only-of-type") * Ct(Cc(s_only_of_type) )
+ + P(":empty") * Ct(Cc(s_empty) )
+ + P(":root") * Ct(Cc(s_root) )
+
+local p_not = P(":not") * Cc(true) * p_space * P("(") * p_space * p_step * p_space * P(")")
+local p_yes = Cc(false) * p_space * p_step
+
+local p_stepper = Ct((p_space * (p_not+p_yes))^1)
+local p_steps = Ct((p_stepper * p_separator^0)^1) * p_space * (P(-1) + function() print("error") end)
+
+local cache = table.setmetatableindex(function(t,k)
+ local v = lpegmatch(p_steps,k) or false
+ t[k] = v
+ return v
+end)
+
+local function selector(root,s)
+ -- local steps = lpegmatch(p_steps,s)
+ local steps = cache[s]
+ if steps then
+ local done = { }
+ local collected = { }
+ local nofcollected = 0
+ local nofsteps = #steps
+ for i=1,nofsteps do
+ local step = steps[i]
+ local n = #step
+ if n > 0 then
+ local r = root
+ local m = 0
+ local c = { }
+ for i=1,n,2 do
+ local s = step[i+1] -- function + data
+ m = s[1](r,c,0,step[i],s[2],s[3],s[4])
+ if m == 0 then
+ break
+ else
+ r = c
+ c = { }
+ end
+ end
+ if m > 0 then
+ if nofsteps > 1 then
+ for i=1,m do
+ local ri = r[i]
+ if done[ri] then
+ -- print("duplicate",i)
+ -- elseif ri.special then
+ -- done[ri] = true
+ else
+ nofcollected = nofcollected + 1
+ collected[nofcollected] = ri
+ done[ri] = true
+ end
+ end
+ else
+ return r
+ end
+ end
+ end
+ end
+ if nofcollected > 1 then
+ -- local n = 0
+ -- local function traverse(e)
+ -- if done[e] then
+ -- n = n + 1
+ -- done[e] = n
+ -- end
+ -- local dt = e.dt
+ -- if dt then
+ -- for i=1,#dt do
+ -- local e = dt[i]
+ -- if type(e) == "table" then
+ -- traverse(e)
+ -- end
+ -- end
+ -- end
+ -- end
+ -- traverse(root[1])
+ --
+ local n = 0
+ local function traverse(dt)
+ for i=1,#dt do
+ local e = dt[i]
+ if done[e] then
+ n = n + 1
+ done[e] = n
+ if n == nofcollected then
+ return
+ end
+ end
+ local d = e.dt
+ if d then
+ traverse(d)
+ if n == nofcollected then
+ return
+ end
+ end
+ end
+ end
+ local r = root[1]
+ if done[r] then
+ n = n + 1
+ done[r] = n
+ end
+ traverse(r.dt)
+ --
+ sort(collected,function(a,b) return done[a] < done[b] end)
+ end
+ return collected
+ else
+ return { }
+ end
+end
+
+xml.applyselector= selector
+
+-- local t = [[
+-- <?xml version="1.0" ?>
+--
+-- <a>
+-- <b class="one"> </b>
+-- <b class="two"> </b>
+-- <b class="one"> </b>
+-- <b class="three"> </b>
+-- <b id="first"> </b>
+-- <c> </c>
+-- <d> d e </d>
+-- <e> d e </e>
+-- <e> d e e </e>
+-- <d> d f </d>
+-- <f foo="bar"> </f>
+-- <f bar="foo"> </f>
+-- <f bar="foo1"> </f>
+-- <f bar="foo2"> </f>
+-- <f bar="foo3"> </f>
+-- <f bar="foo+4"> </f>
+-- <g> </g>
+-- <?crap ?>
+-- <!-- crap -->
+-- <g> <gg> <d> </d> </gg> </g>
+-- <g> <gg> <f> </f> </gg> </g>
+-- <g> <gg> <f class="one"> g gg f </f> </gg> </g>
+-- <g> </g>
+-- <g> <gg> <f class="two"> g gg f </f> </gg> </g>
+-- <g> <gg> <f class="three"> g gg f </f> </gg> </g>
+-- <g> <f class="one"> g f </f> </g>
+-- <g> <f class="three"> g f </f> </g>
+-- <h whatever="four five six"> </h>
+-- </a>
+-- ]]
+--
+-- local s = [[ .one ]]
+-- local s = [[ .one, .two ]]
+-- local s = [[ .one, .two, #first ]]
+-- local s = [[ .one, .two, #first, c, e, [foo], [bar=foo] ]]
+-- local s = [[ .one, .two, #first, c, e, [foo], [bar=foo], [bar~=foo] [bar^="foo"] ]]
+-- local s = [[ [bar^="foo"] ]]
+-- local s = [[ g f .one, g f .three ]]
+-- local s = [[ g > f .one, g > f .three ]]
+-- local s = [[ * ]]
+-- local s = [[ d + e ]]
+-- local s = [[ d ~ e ]]
+-- local s = [[ d ~ e, g f .one, g f .three ]]
+-- local s = [[ :not(d) ]]
+-- local s = [[ [whatever~="five"] ]]
+-- local s = [[ :not([whatever~="five"]) ]]
+-- local s = [[ e ]]
+-- local s = [[ :not ( e ) ]]
+-- local s = [[ a:nth-child(3) ]]
+-- local s = [[ a:nth-child(3n+1) ]]
+-- local s = [[ a:nth-child(2n+8) ]]
+-- local s = [[ g:nth-of-type(3) ]]
+-- local s = [[ a:first-child ]]
+-- local s = [[ a:last-child ]]
+-- local s = [[ e:first-of-type ]]
+-- local s = [[gg d:only-of-type ]]
+-- local s = [[ a:nth-child(even) ]]
+-- local s = [[ a:nth-child(odd) ]]
+-- local s = [[ g:empty ]]
+-- local s = [[ g:root ]]
+
+function css.applyselector(x,str)
+ -- the wrapping needs checking so this is a placeholder
+ return applyselector({ x },str)
+end
+
+-- local c = css.applyselector(xml.convert(t),s) for i=1,#c do print(xml.tostring(c[i])) end
+