From 19af23ac5cb927d986a64ac1dc52ed2d7bad2450 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Fri, 28 Sep 2007 11:58:00 +0200 Subject: stable 2007.09.28 11:58 --- scripts/context/lua/luatools.lua | 79 +- scripts/context/lua/mtx-babel.lua | 368 ++++++ scripts/context/lua/mtx-chars.lua | 81 ++ scripts/context/lua/mtxrun.lua | 2370 +++++++++++++++++----------------- scripts/context/ruby/base/pdf.rb | 2 +- scripts/context/ruby/base/tex.rb | 55 +- scripts/context/ruby/base/texutil.rb | 84 +- scripts/context/ruby/www/dir.rb | 6 +- 8 files changed, 1781 insertions(+), 1264 deletions(-) create mode 100644 scripts/context/lua/mtx-babel.lua (limited to 'scripts') diff --git a/scripts/context/lua/luatools.lua b/scripts/context/lua/luatools.lua index 84899275c..d53180cfa 100644 --- a/scripts/context/lua/luatools.lua +++ b/scripts/context/lua/luatools.lua @@ -5056,6 +5056,7 @@ messages.help = [[ --show-path show path expansion of ... --var-value report value of variable --find-file report file location +--find-path report path of file --make or --ini make luatex format --run or --fmt= run luatex format --luafile=str lua inifile (default is .lua) @@ -5203,22 +5204,26 @@ input.report(banner,"\n") local ok = true -if environment.arguments["selfmerge"] then - utils.merger.selfmerge(own.name,own.libs,own.list) -elseif environment.arguments["selfclean"] then - utils.merger.selfclean(own.name) -elseif environment.arguments["selfupdate"] then - input.my_prepare_b(instance) - input.verbose = true - input.update_script(instance,own.name,"luatools") -elseif environment.arguments["generate"] then - instance.renewcache = true - input.verbose = true +if environment.arguments["find-file"] then input.my_prepare_b(instance) -elseif environment.arguments["make"] or environment.arguments["ini"] or environment.arguments["compile"] then + instance.format = environment.arguments["format"] or instance.format + if instance.pattern then + instance.allresults = true + input.for_files(instance, input.find_files, { instance.pattern }, instance.my_format) + else + input.for_files(instance, input.find_files, environment.files, instance.my_format) + end +elseif environment.arguments["find-path"] then input.my_prepare_b(instance) - input.verbose = true - input.my_make_format(instance,environment.files[1] or "") + local path = input.find_file(instance, environment.files[1], instance.my_format) + if input.verbose then + input.report(file.dirname(path)) + else + print(file.dirname(path)) + end +--~ elseif environment.arguments["first-writable-path"] then +--~ input.my_prepare_b(instance) +--~ input.report(input.first_writable_path(instance,environment.files[1] or ".")) elseif environment.arguments["run"] then input.my_prepare_a(instance) -- ! no need for loading databases input.verbose = true @@ -5227,15 +5232,6 @@ elseif environment.arguments["fmt"] then input.my_prepare_a(instance) -- ! no need for loading databases input.verbose = true input.my_run_format(instance,environment.arguments["fmt"], environment.files[1] or "") -elseif environment.arguments["variables"] or environment.arguments["show-variables"] then - input.my_prepare_a(instance) - input.list_variables(instance) -elseif environment.arguments["expansions"] or environment.arguments["show-expansions"] then - input.my_prepare_a(instance) - input.list_expansions(instance) -elseif environment.arguments["configurations"] or environment.arguments["show-configurations"] then - input.my_prepare_a(instance) - input.list_configurations(instance) elseif environment.arguments["expand-braces"] then input.my_prepare_a(instance) input.for_files(instance, input.expand_braces, environment.files) @@ -5251,18 +5247,6 @@ elseif environment.arguments["show-path"] or environment.arguments["path-value"] elseif environment.arguments["var-value"] or environment.arguments["show-value"] then input.my_prepare_a(instance) input.for_files(instance, input.var_value, environment.files) -elseif environment.arguments["find-file"] then - input.my_prepare_b(instance) - instance.format = environment.arguments["format"] or instance.format - if instance.pattern then - instance.allresults = true - input.for_files(instance, input.find_files, { instance.pattern }, instance.my_format) - else - input.for_files(instance, input.find_files, environment.files, instance.my_format) - end ---~ elseif environment.arguments["first-writable-path"] then ---~ input.my_prepare_b(instance) ---~ input.report(input.first_writable_path(instance,environment.files[1] or ".")) elseif environment.arguments["format-path"] then input.my_prepare_b(instance) input.report(caches.setpath(instance,"format")) @@ -5271,6 +5255,31 @@ elseif instance.pattern then -- brrr instance.format = environment.arguments["format"] or instance.format instance.allresults = true input.for_files(instance, input.find_files, { instance.pattern }, instance.my_format) +elseif environment.arguments["generate"] then + instance.renewcache = true + input.verbose = true + input.my_prepare_b(instance) +elseif environment.arguments["make"] or environment.arguments["ini"] or environment.arguments["compile"] then + input.my_prepare_b(instance) + input.verbose = true + input.my_make_format(instance,environment.files[1] or "") +elseif environment.arguments["selfmerge"] then + utils.merger.selfmerge(own.name,own.libs,own.list) +elseif environment.arguments["selfclean"] then + utils.merger.selfclean(own.name) +elseif environment.arguments["selfupdate"] then + input.my_prepare_b(instance) + input.verbose = true + input.update_script(instance,own.name,"luatools") +elseif environment.arguments["variables"] or environment.arguments["show-variables"] then + input.my_prepare_a(instance) + input.list_variables(instance) +elseif environment.arguments["expansions"] or environment.arguments["show-expansions"] then + input.my_prepare_a(instance) + input.list_expansions(instance) +elseif environment.arguments["configurations"] or environment.arguments["show-configurations"] then + input.my_prepare_a(instance) + input.list_configurations(instance) elseif environment.arguments["help"] or (environment.files[1]=='help') or (#environment.files==0) then if not input.verbose then input.verbose = true diff --git a/scripts/context/lua/mtx-babel.lua b/scripts/context/lua/mtx-babel.lua new file mode 100644 index 000000000..5ef9ae934 --- /dev/null +++ b/scripts/context/lua/mtx-babel.lua @@ -0,0 +1,368 @@ +-- data tables by Thomas A. Schmitz + +dofile(input.find_file(instance,"luat-log.lua")) + +texmf.instance = instance -- we need to get rid of this / maybe current instance in global table + +scripts = scripts or { } +scripts.babel = scripts.babel or { } + +do + + local replace_01 = { -- <' * | + a = "ᾅ", + h = "ᾕ", + w = "ᾥ", + } + + local replace_02 = { -- >' * | + a = "ᾄ", + h = "ᾔ", + w = "ᾤ", + } + + local replace_03 = { -- <` * | + a = "ᾃ", + h = "ᾓ", + w = "ᾣ", + } + + local replace_04 = { -- >` * | + a = "ᾂ", + h = "ᾒ", + w = "ᾢ", + } + + local replace_05 = { -- <~ * | + a = "ᾇ", + h = "ᾗ", + w = "ᾧ", + } + + local replace_06 = { -- >~ * | + a = "ᾆ", + h = "ᾖ", + w = "ᾦ" + } + + local replace_07 = { -- "' * + i = "ΐ", + u = "ΰ", + } + + local replace_08 = { -- "` * + i = "ῒ", + u = "ῢ", + } + + local replace_09 = { -- "~ * + i = "ῗ", + u = "ῧ", + } + + local replace_10 = { -- <' * + a = "ἅ", + e = "ἕ", + h = "ἥ", + i = "ἵ", + o = "ὅ", + u = "ὕ", + w = "ὥ", + A = "Ἅ", + E = "Ἕ", + H = "Ἥ", + I = "Ἵ", + O = "Ὅ", + U = "Ὕ", + W = "Ὥ", + } + + local replace_11 = { -- >' * + a = "ἄ", + e = "ἔ", + h = "ἤ", + i = "ἴ", + o = "ὄ", + u = "ὔ", + w = "ὤ", + A = "Ἄ", + E = "Ἔ", + H = "Ἤ", + I = "Ἴ", + O = "Ὄ", + U = "῎Υ", + W = "Ὤ", + } + + local replace_12 = { -- <` * + a = "ἃ", + e = "ἓ", + h = "ἣ", + i = "ἳ", + o = "ὃ", + u = "ὓ", + w = "ὣ", + A = "Ἃ", + E = "Ἒ", + H = "Ἣ", + I = "Ἳ", + O = "Ὃ", + U = "Ὓ", + W = "Ὣ", + } + + local replace_13 = { -- >` * + a = "ἂ", + e = "ἒ", + h = "ἢ", + i = "ἲ", + o = "ὂ", + u = "ὒ", + w = "ὢ", + A = "Ἂ", + E = "Ἒ", + H = "Ἢ", + I = "Ἲ", + O = "Ὂ", + U = "῍Υ", + W = "Ὢ", + } + + local replace_14 = { -- <~ * + a = "ἇ", + h = "ἧ", + i = "ἷ", + u = "ὗ", + w = "ὧ", + A = "Ἇ", + H = "Ἧ", + I = "Ἷ", + U = "Ὗ", + W = "Ὧ", + } + + local replace_15 = { -- >~ * + a = "ἆ", + h = "ἦ", + i = "ἶ", + u = "ὖ", + w = "ὦ", + A = "Ἆ", + H = "Ἦ", + I = "Ἶ", + U = "῏Υ", + W = "Ὦ", + } + + local replace_16 = { -- ' * | + a = "ᾴ", + h = "ῄ", + w = "ῴ", + } + + local replace_17 = { -- ` * | + a = "ᾲ", + h = "ῂ", + w = "ῲ", + } + + local replace_18 = { -- ~ * | + a = "ᾷ", + h = "ῇ", + w = "ῷ" + } + + local replace_19 = { -- ' * + a = "ά", + e = "έ", + h = "ή", + i = "ί", + o = "ό", + u = "ύ", + w = "ώ", + } + + local replace_20 = { -- ` * + a = "ὰ", + e = "ὲ", + h = "ὴ", + i = "ὶ", + o = "ὸ", + u = "ὺ", + w = "ὼ", + } + + local replace_21 = { -- ~ * + a = "ᾶ", + h = "ῆ", + i = "ῖ", + u = "ῦ", + w = "ῶ", + } + + local replace_22 = { -- < * + a = "ἁ", + e = "ἑ", + h = "ἡ", + i = "ἱ", + o = "ὁ", + u = "ὑ", + w = "ὡ", + r = "ῥ", + A = "Ἁ", + E = "Ἑ", + H = "Ἡ", + I = "Ἱ", + O = "Ὁ", + U = "Ὑ", + W = "Ὡ", + } + + local replace_23 = { -- > * + a = "ἀ", + e = "ἐ", + h = "ἠ", + i = "ἰ", + o = "ὀ", + u = "ὐ", + w = "ὠ", + A = "Ἀ", + E = "Ἐ", + H = "Ἠ", + I = "Ἰ", + O = "Ὀ", + U = "᾿Υ", + W = "Ὠ", + } + + local replace_24 = { -- * | + a = "ᾳ", + h = "ῃ", + w = "ῳ", + } + + local replace_25 = { -- " * + i = "ϊ", + u = "ϋ", + } + + local replace_26 = { -- * + a = "α", + b = "β", + g = "γ", + d = "δ", + e = "ε", + z = "ζ", + h = "η", + j = "θ", + i = "ι", + k = "κ", + l = "λ", + m = "μ", + n = "ν", + x = "ξ", + o = "ο", + p = "π", + r = "ρ", + s = "σ", + c = "ς", + t = "τ", + u = "υ", + f = "φ", + q = "χ", + y = "ψ", + w = "ω", + A = "Α", + B = "Β", + G = "Γ", + D = "Δ", + E = "Ε", + Z = "Ζ", + H = "Η", + J = "Θ", + I = "Ι", + K = "Κ", + L = "Λ", + M = "Μ", + N = "Ν", + X = "Ξ", + O = "Ο", + P = "Π", + R = "Ρ", + S = "Σ", + T = "Τ", + U = "Υ", + F = "Φ", + Q = "Χ", + Y = "Ψ", + W = "Ω" + } + + local skips_01 = lpeg.P("\\") * lpeg.R("az", "AZ")^1 + local skips_02 = lpeg.P("[") * (1- lpeg.S("[]"))^1 * lpeg.P("]") + + local stage_01 = (lpeg.P("<'") * lpeg.Cs(1) * lpeg.P('|')) / replace_01 + local stage_02 = (lpeg.P(">'") * lpeg.Cs(1) * lpeg.P('|')) / replace_02 + local stage_03 = (lpeg.P("<`") * lpeg.Cs(1) * lpeg.P('|')) / replace_03 + local stage_04 = (lpeg.P(">`") * lpeg.Cs(1) * lpeg.P('|')) / replace_04 + local stage_05 = (lpeg.P("<~") * lpeg.Cs(1) * lpeg.P('|')) / replace_05 + local stage_06 = (lpeg.P(">~") * lpeg.Cs(1) * lpeg.P('|')) / replace_06 + local stage_07 = (lpeg.P('"\'') * lpeg.Cs(1) ) / replace_07 + local stage_08 = (lpeg.P('"`') * lpeg.Cs(1) ) / replace_08 + local stage_09 = (lpeg.P('"~') * lpeg.Cs(1) ) / replace_09 + local stage_10 = (lpeg.P("<'") * lpeg.Cs(1) ) / replace_10 + local stage_11 = (lpeg.P(">'") * lpeg.Cs(1) ) / replace_11 + local stage_12 = (lpeg.P("<`") * lpeg.Cs(1) ) / replace_12 + local stage_13 = (lpeg.P(">`") * lpeg.Cs(1) ) / replace_13 + local stage_14 = (lpeg.P(">~") * lpeg.Cs(1) ) / replace_14 + local stage_15 = (lpeg.P(">~") * lpeg.Cs(1) ) / replace_15 + local stage_16 = (lpeg.P("'") * lpeg.Cs(1) * lpeg.P('|')) / replace_16 + local stage_17 = (lpeg.P("`") * lpeg.Cs(1) * lpeg.P('|')) / replace_17 + local stage_18 = (lpeg.P("~") * lpeg.Cs(1) * lpeg.P('|')) / replace_18 + local stage_19 = (lpeg.P("'") * lpeg.Cs(1) ) / replace_19 + local stage_20 = (lpeg.P("`") * lpeg.Cs(1) ) / replace_20 + local stage_21 = (lpeg.P("~") * lpeg.Cs(1) ) / replace_21 + local stage_22 = (lpeg.P("<") * lpeg.Cs(1) ) / replace_22 + local stage_23 = (lpeg.P(">") * lpeg.Cs(1) ) / replace_23 + local stage_24 = (lpeg.Cs(1) * lpeg.P('|') ) / replace_24 + local stage_25 = (lpeg.P('"') * lpeg.Cs(1) ) / replace_25 + local stage_26 = (lpeg.Cs(1) ) / replace_26 + + local stages = + skips_01 + skips_02 + + stage_01 + stage_02 + stage_03 + stage_04 + stage_05 + + stage_06 + stage_07 + stage_08 + stage_09 + stage_10 + + stage_11 + stage_12 + stage_13 + stage_14 + stage_15 + + stage_16 + stage_17 + stage_18 + stage_19 + stage_20 + + stage_21 + stage_22 + stage_23 + stage_24 + stage_25 + + stage_26 + + local parser = lpeg.Cs((stages + 1)^0) + + -- lpeg.print(parser): 254 lines + + function scripts.babel.convert(filename) + if filename and filename ~= empty then + local data = io.loaddata(filename) + if data then + data = parser:match(data) + io.savedata(filename .. ".utf", data) + end + end + end + +end + +banner = banner .. " | conversion tools " + +messages.help = [[ +--convert convert babel codes into utf +]] + +input.verbose = true + +if environment.argument("convert") then + scripts.babel.convert(environment.files[1] or "") +else + input.help(banner,messages.help) +end diff --git a/scripts/context/lua/mtx-chars.lua b/scripts/context/lua/mtx-chars.lua index 28d7b4a40..470846419 100644 --- a/scripts/context/lua/mtx-chars.lua +++ b/scripts/context/lua/mtx-chars.lua @@ -75,16 +75,97 @@ if not characters then characters = { } end end end +scripts.chars.banner_utf_1 = [[ +% filename : enco-utf.tex +% comment : generated by mtxrun --script chars --utf +% author : Hans Hagen, PRAGMA-ADE, Hasselt NL +% copyright: PRAGMA ADE / ConTeXt Development Team +% license : see context related readme files + +\ifx\setcclcucx\undefined + + \def\setcclcucx #1 #2 #3 % + {\global\catcode"#1=11 + \global\lccode "#1="#2 + \global\uccode "#1="#3 } + +\fi +]] + +scripts.chars.banner_utf_2 = [[ + +% lc/uc/catcode mappings + +]] + +scripts.chars.banner_utf_3 = [[ + +% named characters mapped onto utf + +]] + +scripts.chars.banner_utf_4 = [[ + +\endinput +]] + +function scripts.chars.makeencoutf() + local chartable = input.find_file(instance,"char-def.lua") or "" + if chartable ~= "" then + dofile(chartable) + if characters and characters.data then + local f = io.open("enco-utf.tex", 'w') + if f then + local char, format = unicode.utf8.char, string.format + f:write(scripts.chars.banner_utf_1) + f:write(scripts.chars.banner_utf_2) + local list = table.sortedkeys(characters.data) + local length = 0 + for i=1,#list do + local code = list[i] + if code <= 0xFFFF then + local chr = characters.data[code] + local cc = chr.category + if cc == 'll' or cc == 'lu' or cc == 'lt' then + if not chr.lccode then chr.lccode = code end + if not chr.uccode then chr.uccode = code end + f:write(format("\\setcclcucx %04X %04X %04X %% %s\n",code,chr.lccode,chr.uccode,chr.description)) + end + if #(chr.contextname or "") > length then + length = #chr.contextname + end + end + end + f:write(scripts.chars.banner_utf_3) + for i=1,#list do + local code = list[i] + if code > 0x7F and code <= 0xFFFF then + local chr = characters.data[code] + if chr.contextname then + f:write(format("\\def\\%s{%s} %% %s\n", chr.contextname:rpadd(length," "), char(code),chr.description)) + end + end + end + f:write(scripts.chars.banner_utf_4) + f:close() + end + end + end +end + banner = banner .. " | character tools " messages.help = [[ --stix convert stix table to math table +--utf generate enco-utf.tex (used by xetex) ]] if environment.argument("stix") then local inname = environment.files[1] or "" local outname = environment.files[2] or "" scripts.chars.stixtomkiv(inname,outname) +elseif environment.argument("utf") then + scripts.chars.makeencoutf() else input.help(banner,messages.help) end diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 2a3a496a3..baad28e84 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -168,7 +168,7 @@ end --~ split = lpeg.Ct(c*(p*c)^0) --~ splitters[separator] = split --~ end ---~ return lpeg.match(split,self) +--~ return lpeg.match(split,self) -- split:match(self) --~ else --~ return { } --~ end @@ -325,7 +325,7 @@ end --~ return self .. self.rep(chr or " ",n-#self) --~ end -function string:padd(n,chr) +function string:rpadd(n,chr) local m = n-#self if m > 0 then return self .. self.rep(chr or " ",m) @@ -334,6 +334,17 @@ function string:padd(n,chr) end end +function string:lpadd(n,chr) + local m = n-#self + if m > 0 then + return self.rep(chr or " ",m) .. self + else + return self + end +end + +string.padd = string.rpadd + function is_number(str) return str:find("^[%-%+]?[%d]-%.?[%d+]$") == 1 end @@ -530,6 +541,8 @@ end do + -- one of my first exercises in lua ... + -- 34.055.092 32.403.326 arabtype.tma -- 1.620.614 1.513.863 lmroman10-italic.tma -- 1.325.585 1.233.044 lmroman10-regular.tma @@ -889,6 +902,25 @@ function table.tohash(t) return h end +function table.contains(t, v) + if t then + for i=1, #t do + if t[i] == v then + return true + end + end + end + return false +end + +function table.count(t) + local n, e = 0, next(t) + while e do + n, e = n + 1, next(t,e) + end + return n +end + --~ function table.are_equal(a,b) --~ return table.serialize(a) == table.serialize(b) --~ end @@ -1387,12 +1419,20 @@ function boolean.tonumber(b) if b then return 1 else return 0 end end -function toboolean(str) - if type(str) == "string" then - return str == "true" or str == "yes" or str == "on" or str == "1" - elseif type(str) == "number" then - return tonumber(str) ~= 0 - elseif type(str) == "nil" then +function toboolean(str,tolerant) + if tolerant then + if type(str) == "string" then + return str == "true" or str == "yes" or str == "on" or str == "1" + elseif type(str) == "number" then + return tonumber(str) ~= 0 + elseif type(str) == "nil" then + return false + else + return str + end + elseif str == "true" then + return true + elseif str == "false" then return false else return str @@ -1427,13 +1467,14 @@ if not modules then modules = { } end modules ['l-xml'] = { license = "see context related readme files" } --- todo: ns, tg = s:match("^(.-):?([^:]+)$") +-- RJ: key=value ... lpeg.Ca(lpeg.Cc({}) * (pattern-producing-key-and-value / rawset)^0) --[[ldx--

The parser used here is inspired by the variant discussed in the lua book, but handles comment and processing instructions, has a different structure, provides parent access; a first version used different tricky but was less optimized to we -went this route.

+went this route. First we had a find based parser, now we have an based one. +The find based parser can be found in l-xml-edu.lua along with other older code.

Expecially the lpath code is experimental, we will support some of xpath, but only things that make sense for us; as compensation it is possible to hook in your @@ -1442,7 +1483,7 @@ this module for process management, like handling and files.

-a/b/c /*/c (todo: a/b/(pattern)/d) +a/b/c /*/c a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n) a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n) @@ -1457,48 +1498,86 @@ tex = tex or { } xml.trace_lpath = false xml.trace_print = false +xml.trace_remap = false --[[ldx-- -

First a hack to enable namespace resolving.

+

First a hack to enable namespace resolving. A namespace is characterized by +a . The following function associates a namespace prefix with a +pattern. We use , which in this case is more than twice as fast as a +find based solution where we loop over an array of patterns. Less code and +much cleaner.

--ldx]]-- +xml.xmlns = { } + do - xml.xmlns = { } + local parser = lpeg.P(false) -- printing shows that this has no side effects + + --[[ldx-- +

The next function associates a namespace prefix with an . This + normally happens independent of parsing.

- local data = { } + + xml.registerns("mml","mathml") + + --ldx]]-- - function xml.registerns(namespace,pattern) - data[#data+1] = { namespace:lower(), pattern:lower() } + function xml.registerns(namespace, pattern) -- pattern can be an lpeg + parser = parser + lpeg.C(lpeg.P(pattern:lower())) / namespace end + --[[ldx-- +

The next function also registers a namespace, but this time we map a + given namespace prefix onto a registered one, using the given + . This used for attributes like xmlns:m.

+ + + xml.checkns("m","http://www.w3.org/mathml") + + --ldx]]-- + function xml.checkns(namespace,url) - url = url:lower() - for i=1,#data do - local d = data[i] - if url:find(d[2]) then - if namespace ~= d[1] then - xml.xmlns[namespace] = d[1] - end - end + local ns = parser:match(url:lower()) + if ns and namespace ~= ns then + xml.xmlns[namespace] = ns end end + --[[ldx-- +

Next we provide a way to turn an into a registered + namespace. This used for the xmlns attribute.

+ + + resolvedns = xml.resolvens("http://www.w3.org/mathml") + + + This returns mml. + --ldx]]-- + function xml.resolvens(url) - url = url:lower() - for i=1,#data do - local d = data[i] - if url:find(d[2]) then - return d[1] - end - end - return "" + return parser:match(url:lower()) or "" end + --[[ldx-- +

A namespace in an element can be remapped onto the registered + one efficiently by using the xml.xmlns table.

+ --ldx]]-- + end --[[ldx-- -

Next comes the loader. The dreadful doctype comes in many disguises:

+

This version uses . We follow the same approach as before, stack and top and +such. This version is about twice as fast which is mostly due to the fact that +we don't have to prepare the stream for cdata, doctype etc etc. This variant is +is dedicated to Luigi Scarso, who challenged me with 40 megabyte files that +took 12.5 seconds to load (1.5 for file io and the rest for tree building). With +the implementation we got that down to less 7.3 seconds. Loading the 14 + interface definition files (2.6 meg) went down from 1.05 seconds to 0.55.

+ +

Next comes the parser. The rather messy doctype definition comes in many +disguises so it is no surprice that later on have to dedicate quite some + code to it.

@@ -1508,320 +1587,466 @@ end + +

The code may look a bit complex but this is mostly due to the fact that we +resolve namespaces and attach metatables. There is only one public function:

+ + +local x = xml.convert(somestring) + + +

An optional second boolean argument tells this function not to create a root +element.

--ldx]]-- do - -- Loading 12 cont-*.xml and keys-*.xml files totaling to 2.62 MBytes takes 1.1 sec - -- on a windows vista laptop with dual core 7600 (2.3 Ghz), which is not that bad. - -- Of this half time is spent on doctype etc parsing. - - local doctype_patterns = { - "", - "", - "", - "", - "", - "" - } + local remove, nsremap = table.remove, xml.xmlns - -- We assume no "<" which is the lunatic part of the xml spec - -- especially since ">" is permitted; otherwise we need a char - -- by char parser ... more something for later ... normally - -- entities will be used anyway. + local stack, top, dt, at, xmlns, errorstr = {}, {}, {}, {}, {}, nil - -- data = data:gsub(nothing done) is still a copy so we find first + local mt = { __tostring = xml.text } - local function prepare(data,text) - -- pack (for backward compatibility) - if type(data) == "table" then - data = table.concat(data,"") - end - -- CDATA - if data:find("<%!%[CDATA%[") then - data = data:gsub("<%!%[CDATA%[(.-)%]%]>", function(txt) - text[#text+1] = txt or "" - return string.format("<@cd@>%s",#text) - end) - end - -- DOCTYPE - if data:find("%s",#text) - end) - end - end - return a .. b - end,1) + local function add_attribute(namespace,tag,value) + if tag == "xmlns" then + xmlns[#xmlns+1] = xml.resolvens(value) + at[tag] = value + elseif ns == "xmlns" then + xml.checkns(tag,value) + at["xmlns:" .. tag] = value + else + at[tag] = value end - -- comment / does not catch doctype - data = data:gsub("<%!%-%-(.-)%-%->", function(txt) - text[#text+1] = txt or "" - return string.format("<@cm@>%s",#text) - end) - -- processing instructions / altijd 1 - data = data:gsub("<%?(.-)%?>", function(txt) - text[#text+1] = txt or "" - return string.format("<@pi@>%s",#text) - end) - return data, text end + local function add_begin(spacing, namespace, tag) + if #spacing > 0 then + dt[#dt+1] = spacing + end + local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace + top = { ns=namespace or "", nr=resolved, tg=tag, at=at, dt={}, __p__ = stack[#stack] } + setmetatable(top, mt) + dt = top.dt + stack[#stack+1] = top + at = { } + end + local function add_end(spacing, namespace, tag) + if #spacing > 0 then + dt[#dt+1] = spacing + end + local toclose = remove(stack) + top = stack[#stack] + if #stack < 1 then + errorstr = string.format("nothing to close with %s", tag) + elseif toclose.tg ~= tag then -- no namespace check + errorstr = string.format("unable to close %s with %s", toclose.tg, tag) + end + dt = top.dt + dt[#dt+1] = toclose + if at.xmlns then + remove(xmlns) + end + end + local function add_empty(spacing, namespace, tag) + if #spacing > 0 then + dt[#dt+1] = spacing + end + local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace + top = stack[#stack] + setmetatable(top, mt) + dt = top.dt + dt[#dt+1] = { ns=namespace or "", nr=resolved, tg=tag, at=at, dt={}, __p__ = top } + at = { } + if at.xmlns then + remove(xmlns) + end + end + local function add_text(text) + dt[#dt+1] = text + end + local function add_special(what, spacing, text) + if #spacing > 0 then + dt[#dt+1] = spacing + end + top = stack[#stack] + setmetatable(top, mt) + dt[#dt+1] = { special=true, ns="", tg=what, dt={text} } + end + local function set_message(txt) + errorstr = "garbage at the end of the file: " .. txt:gsub("([ \n\r\t]*)","") + end + + local space = lpeg.S(' \r\n\t') + local open = lpeg.P('<') + local close = lpeg.P('>') + local squote = lpeg.S("'") + local dquote = lpeg.S('"') + local equal = lpeg.P('=') + local slash = lpeg.P('/') + local colon = lpeg.P(':') + local valid = lpeg.R('az', 'AZ', '09') + lpeg.S('_-.') + local name_yes = lpeg.C(valid^1) * colon * lpeg.C(valid^1) + local name_nop = lpeg.C(lpeg.P(true)) * lpeg.C(valid^1) + local name = name_yes + name_nop + + local utfbom = lpeg.P('\000\000\254\255') + lpeg.P('\255\254\000\000') + + lpeg.P('\255\254') + lpeg.P('\254\255') + lpeg.P('\239\187\191') -- no capture + + local spacing = lpeg.C(space^0) + local justtext = lpeg.C((1-open)^1) + local somespace = space^1 + local optionalspace = space^0 + + local value = (squote * lpeg.C((1 - squote)^0) * squote) + (dquote * lpeg.C((1 - dquote)^0) * dquote) + local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute + local attributes = attribute^0 + + local text = justtext / add_text + local balanced = lpeg.P { "[" * ((1 - lpeg.S"[]") + lpeg.V(1))^0 * "]" } -- taken from lpeg manual, () example + + local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty + local beginelement = (spacing * open * name * attributes * optionalspace * close) / add_begin + local endelement = (spacing * open * slash * name * optionalspace * close) / add_end + + local begincomment = open * lpeg.P("!--") + local endcomment = lpeg.P("--") * close + local begininstruction = open * lpeg.P("?") + local endinstruction = lpeg.P("?") * close + local begincdata = open * lpeg.P("![CDATA[") + local endcdata = lpeg.P("]]") * close + + local someinstruction = lpeg.C((1 - endinstruction)^0) + local somecomment = lpeg.C((1 - endcomment )^0) + local somecdata = lpeg.C((1 - endcdata )^0) + + local begindoctype = open * lpeg.P("!DOCTYPE") + local enddoctype = close + local publicdoctype = lpeg.P("PUBLIC") * somespace * value * somespace * value * somespace * balanced^0 + local systemdoctype = lpeg.P("SYSTEM") * somespace * value * somespace * balanced^0 + local simpledoctype = (1-close)^1 * balanced^0 + local somedoctype = lpeg.C((somespace * lpeg.P(publicdoctype + systemdoctype + simpledoctype) * optionalspace)^0) + + local instruction = (spacing * begininstruction * someinstruction * endinstruction) / function(...) add_special("@pi@",...) end + local comment = (spacing * begincomment * somecomment * endcomment ) / function(...) add_special("@cm@",...) end + local cdata = (spacing * begincdata * somecdata * endcdata ) / function(...) add_special("@cd@",...) end + local doctype = (spacing * begindoctype * somedoctype * enddoctype ) / function(...) add_special("@dd@",...) end + + -- nicer but slower: + -- + -- local instruction = (lpeg.Cc("@pi@") * spacing * begininstruction * someinstruction * endinstruction) / add_special + -- local comment = (lpeg.Cc("@cm@") * spacing * begincomment * somecomment * endcomment ) / add_special + -- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special + -- local doctype = (lpeg.Cc("@dd@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special + + local trailer = space^0 * (justtext/set_message)^0 + + -- comment + emptyelement + text + cdata + instruction + lpeg.V("parent"), -- 6.5 seconds on 40 MB database file + -- text + comment + emptyelement + cdata + instruction + lpeg.V("parent"), -- 5.8 + -- text + lpeg.V("parent") + emptyelement + comment + cdata + instruction, -- 5.5 + - -- maybe we will move the @tg@ stuff to a dedicated key, say 'st'; this will speed up - -- serializing and testing + local grammar = lpeg.P { "preamble", + preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * lpeg.V("parent") * trailer, + parent = beginelement * lpeg.V("children")^0 * endelement, + children = text + lpeg.V("parent") + emptyelement + comment + cdata + instruction, + } - function xml.convert(data,no_root,collapse) - local crap = { } - data, crap = prepare(data, crap) - local nsremap = xml.xmlns - local remove = table.remove - local stack, top = {}, {} - local i, j, errorstr = 1, 1, nil + function xml.convert(data, no_root) -- no collapse any more + stack, top, at, xmlns, errorstr, result = {}, {}, {}, {}, nil, nil stack[#stack+1] = top top.dt = { } - local dt = top.dt - local id = 0 - local namespaces = { } - local mt = { __tostring = xml.text } - while true do - local ni, first, attributes, last, fulltag - ni, j, first, fulltag, attributes, last = data:find("<(/-)([^%s%>/]+)%s*([^>]-)%s*(/-)>", j) - if not ni then break end - local namespace, tag = fulltag:match("^(.-):(.+)$") - if attributes ~= "" then - local t = {} - for ns, tag, _, value in attributes:gmatch("(%w-):?(%w+)=([\"\'])(.-)%3") do - if tag == "xmlns" then -- not ok yet - namespaces[#stack] = xml.resolvens(value) - elseif ns == "" then - t[tag] = value - elseif ns == "xmlns" then - xml.checkns(tag,value) - else - t[tag] = value - end - end - attributes = t - else - attributes = { } - end - if namespace then -- realtime remapping - namespace = nsremap[namespace] or namespace - else - namespace, tag = namespaces[#stack] or "", fulltag - end - local text = data:sub(i, ni-1) - if text == "" or (collapse and text:find("^%s*$")) then - -- no need for empty text nodes, beware, also packs x y z - -- so is not that useful unless used with empty elements - else - dt[#dt+1] = text - end - if first == "/" then - -- end tag - local toclose = remove(stack) -- remove top - top = stack[#stack] - namespaces[#stack] = nil - if #stack < 1 then - errorstr = string.format("nothing to close with %s", tag) - break - elseif toclose.tg ~= tag then -- no namespace check - errorstr = string.format("unable to close %s with %s", toclose.tg, tag) - break - end - if tag:find("^@..@$") then - dt[1] = crap[tonumber(dt[1])] or "" - end - dt = top.dt - dt[#dt+1] = toclose - elseif last == "/" then - -- empty element tag - dt[#dt+1] = { ns = namespace, tg = tag, dt = { }, at = attributes, __p__ = top } - -- setmetatable(top, { __tostring = xml.text }) - setmetatable(top, mt) - else - -- begin tag - top = { ns = namespace, tg = tag, dt = { }, at = attributes, __p__ = stack[#stack] } - -- setmetatable(top, { __tostring = xml.text }) - setmetatable(top, mt) - dt = top.dt - stack[#stack+1] = top - end - i = j + 1 - end - if not errorstr then - local text = data:sub(i) - if dt and not text:find("^%s*$") then - dt[#dt+1] = text - end - if #stack > 1 then - errorstr = string.format("unclosed %s", stack[#stack].tg) - end + dt = top.dt + if not data or data == "" then + errorstr = "empty xml file" + elseif not grammar:match(data) then + errorstr = "invalid xml file" end if errorstr then - stack = { { tg = "error", dt = { errorstr } } } - -- setmetatable(stack, { __tostring = xml.text }) + result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={} } } } setmetatable(stack, mt) - end - if no_root then - return stack[1] + if xml.error_handler then xml.error_handler("load",errorstr) end else - local t = { ns = "", tg = '@rt@', dt = stack[1].dt } - -- setmetatable(t, { __tostring = xml.text }) - setmetatable(t, mt) - for k,v in ipairs(t.dt) do - if type(v) == "table" and v.tg ~= "@pi@" and v.tg ~= "@dd@" and v.tg ~= "@cm@" then - t.ri = k -- rootindex + result = stack[1] + end + if not no_root then + result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={} } + setmetatable(result, mt) + for k,v in ipairs(result.dt) do + if type(v) == "table" and not v.special then -- always table -) + result.ri = k -- rootindex break end end - return t end + return result end - function xml.copy(old,tables,parent) -- fast one - tables = tables or { } - if old then - local new = { } - if not table[old] then - table[old] = new - end - for i,v in pairs(old) do - -- new[i] = (type(v) == "table" and (table[v] or xml.copy(v, tables, table))) or v - if type(v) == "table" then - new[i] = table[v] or xml.copy(v, tables, table) - else - new[i] = v - end - end - local mt = getmetatable(old) - if mt then - setmetatable(new,mt) - end - return new - else - return { } - end + --[[ldx-- +

Packaging data in an xml like table is done with the following + function. Maybe it will go away (when not used).

+ --ldx]]-- + + function xml.package(tag,attributes,data) + local ns, tg = tag:match("^(.-):?([^:]+)$") + local t = { ns = ns, tg = tg, dt = data or "", at = attributes or {} } + setmetatable(t, mt) + return t end + xml.error_handler = (logs and logs.report) or print + end -function xml.load(filename,collapse) +--[[ldx-- +

We cannot load an from a filehandle so we need to load +the whole file first. The function accepts a string representing +a filename or a file handle.

+--ldx]]-- + +function xml.load(filename) if type(filename) == "string" then - local root, f = { }, io.open(filename,'r') -- no longer 'rb' + local root, f = { }, io.open(filename,'r') if f then - root = xml.convert(f:read("*all"),false,collapse) + root = xml.convert(f:read("*all")) f:close() + else + -- if we want an error: root = xml.convert("") end - return root + return root -- no nil but an empty table if it fails else - return xml.convert(filename:read("*all"),false,collapse) + return xml.convert(filename:read("*all")) end end -function xml.root(root) - return (root.ri and root.dt[root.ri]) or root +--[[ldx-- +

When we inject new elements, we need to convert strings to +valid trees, which is what the next function does.

+--ldx]]-- + +function xml.toxml(data) + if type(data) == "string" then + local root = { xml.convert(data,true) } + return (#root > 1 and root) or root[1] + else + return data + end end -function xml.toxml(data,collapse) - local t = { xml.convert(data,true,collapse) } - if #t > 1 then - return t +--[[ldx-- +

For copying a tree we use a dedicated function instead of the +generic table copier. Since we know what we're dealing with we +can speed up things a bit. The second argument is not to be used!

+--ldx]]-- + +function xml.copy(old,tables) + if old then + tables = tables or { } + local new = { } + if not tables[old] then + tables[old] = new + end + for k,v in pairs(old) do + new[k] = (type(v) == "table" and (tables[v] or xml.copy(v, tables))) or v + end + local mt = getmetatable(old) + if mt then + setmetatable(new,mt) + end + return new else - return t[1] + return { } end end -function xml.serialize(e, handle, textconverter, attributeconverter) - handle = handle or (tex and tex.sprint) or io.write - if not e then - -- quit - elseif e.command and xml.command then -- test for command == "" ? - xml.command(e) - elseif e.tg then - local format, serialize = string.format, xml.serialize - local ens, etg, eat, edt = e.ns, e.tg, e.at, e.dt - -- no spaces, so no flush needed (check) - if etg == "@pi@" then - handle(format("",edt[1])) - elseif etg == "@cm@" then - handle(format("",edt[1])) - elseif etg == "@cd@" then - handle(format("",edt[1])) - elseif etg == "@dd@" then - handle(format("",edt[1])) - elseif etg == "@rt@" then - serialize(edt,handle,textconverter,attributeconverter) +--[[ldx-- +

In serializing the tree or parts of the tree is a major +actitivity which is why the following function is pretty optimized resulting +in a few more lines of code than needed. The variant that uses the formatting +function for all components is about 15% slower than the concatinating +alternative.

+--ldx]]-- + +do + + -- todo: add when not present + + local fallbackhandle = (tex and tex.sprint) or io.write + + function xml.serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands) + if not e then + -- quit + elseif not nocommands and e.command and xml.command then + xml.command(e) else - local ats = eat and next(eat) and { } - if ats then - if attributeconverter then - for k,v in pairs(eat) do - ats[#ats+1] = format('%s=%q',k,attributeconverter(v)) + handle = handle or fallbackhandle + local etg = e.tg + if etg then + -- local format = string.format + if e.special then + local edt = e.dt + local spc = specialconverter and specialconverter[etg] + if spc then + local result = spc(edt[1]) + if result then + handle(result) + else + -- no need to handle any further + end + elseif etg == "@pi@" then + -- handle(format("",edt[1])) + handle("") -- maybe table.join(edt) + elseif etg == "@cm@" then + -- handle(format("",edt[1])) + handle("") + elseif etg == "@cd@" then + -- handle(format("",edt[1])) + handle("") + elseif etg == "@dd@" then + -- handle(format("",edt[1])) + handle("") + elseif etg == "@rt@" then + xml.serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands) end else - for k,v in pairs(eat) do - ats[#ats+1] = format('%s=%q',k,v) - end - end - end - if ens ~= "" then - if edt and #edt > 0 then + local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn + local ats = eat and next(eat) and { } if ats then - handle(format("<%s:%s %s>",ens,etg,table.concat(ats," "))) - else - handle(format("<%s:%s>",ens,etg)) + local format = string.format + if attributeconverter then + for k,v in pairs(eat) do + ats[#ats+1] = format('%s=%q',k,attributeconverter(v)) + end + else + for k,v in pairs(eat) do + ats[#ats+1] = format('%s=%q',k,v) + end + end end - for i=1,#edt do - serialize(edt[i],handle,textconverter,attributeconverter) + if ern and xml.trace_remap then + if ats then + ats[#ats+1] = string.format("xmlns:remapped='%s'",ern) + else + ats = { string.format("xmlns:remapped='%s'",ern) } + end end - handle(format("",ens,etg)) - else - if ats then - handle(format("<%s:%s %s/>",ens,etg,table.concat(ats," "))) + if ens ~= "" then + if edt and #edt > 0 then + if ats then + -- handle(format("<%s:%s %s>",ens,etg,table.concat(ats," "))) + handle("<" .. ens .. ":" .. etg .. " " .. table.concat(ats," ") .. ">") + else + -- handle(format("<%s:%s>",ens,etg)) + handle("<" .. ens .. ":" .. etg .. ">") + end + local serialize = xml.serialize + for i=1,#edt do + local e = edt[i] + if type(e) == "string" then + if textconverter then + handle(textconverter(e)) + else + handle(e) + end + else + serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands) + end + end + -- handle(format("",ens,etg)) + handle("") + else + if ats then + -- handle(format("<%s:%s %s/>",ens,etg,table.concat(ats," "))) + handle("<%" .. ens .. ":" .. etg .. table.concat(ats," ") .. "/>") + else + -- handle(format("<%s:%s/>",ens,etg)) + handle("<%" .. ens .. ":" .. "/>") + end + end else - handle(format("<%s:%s/>",ens,etg)) + if edt and #edt > 0 then + if ats then + -- handle(format("<%s %s>",etg,table.concat(ats," "))) + handle("<" .. etg .. " " .. table.concat(ats," ") .. ">") + else + -- handle(format("<%s>",etg)) + handle("<" .. etg .. ">") + end + local serialize = xml.serialize + for i=1,#edt do + serialize(edt[i],handle,textconverter,attributeconverter,specialconverter,nocommands) + end + -- handle(format("",etg)) + handle("") + else + if ats then + -- handle(format("<%s %s/>",etg,table.concat(ats," "))) + handle("<" .. etg .. table.concat(ats," ") .. "/>") + else + -- handle(format("<%s/>",etg)) + handle("<" .. etg .. "/>") + end + end end end - else - if edt and #edt > 0 then - if ats then - handle(format("<%s %s>",etg,table.concat(ats," "))) - else - handle(format("<%s>",etg)) - end - for i=1,#edt do - serialize(edt[i],handle,textconverter,attributeconverter) - end - handle(format("",etg)) + elseif type(e) == "string" then + if textconverter then + handle(textconverter(e)) else - if ats then - handle(format("<%s %s/>",etg,table.concat(ats," "))) - else - handle(format("<%s/>",etg)) - end + handle(e) + end + else + local serialize = xml.serialize + for i=1,#e do + serialize(e[i],handle,textconverter,attributeconverter,specialconverter,nocommands) end end end - elseif type(e) == "string" then - if textconverter then - handle(textconverter(e)) - else - handle(e) - end - else - for i=1,#e do - xml.serialize(e[i],handle,textconverter,attributeconverter) + end + + function xml.checkbom(root) + if root.ri then + local dt, found = root.dt, false + for k,v in ipairs(dt) do + if type(v) == "table" and v.special and v.tg == "@pi" and v.dt:find("xml.*version=") then + found = true + break + end + end + if not found then + table.insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } ) + table.insert(dt, 2, "\n" ) + end end end + +end + +--[[ldx-- +

At the cost of some 25% runtime overhead you can first convert the tree to a string +and then handle the lot.

+--ldx]]-- + +function xml.tostring(root) -- 25% overhead due to collecting + if root then + if type(root) == 'string' then + return root + elseif next(root) then + local result = { } + xml.serialize(root,function(s) result[#result+1] = s end) + return table.concat(result,"") + end +end + return "" end -function xml.string(e,handle) -- weird one that may become obsolete - if e.tg then +--[[ldx-- +

The next function operated on the content only and needs a handle function +that accepts a string.

+--ldx]]-- + +function xml.string(e,handle) + if not handle or (e.special and e.tg ~= "@rt@") then + -- nothing + elseif e.tg then local edt = e.dt if edt then for i=1,#edt do @@ -1833,6 +2058,21 @@ function xml.string(e,handle) -- weird one that may become obsolete end end +--[[ldx-- +

How you deal with saving data depends on your preferences. For a 40 MB database +file the timing on a 2.3 Core Duo are as follows (time in seconds):

+ + +1.3 : load data from file to string +6.1 : convert string into tree +5.3 : saving in file using xmlsave +6.8 : converting to string using xml.tostring +3.6 : saving converted string in file + + +

The save function is given below.

+--ldx]]-- + function xml.save(root,name) local f = io.open(name,"w") if f then @@ -1841,535 +2081,67 @@ function xml.save(root,name) end end -function xml.stringify(root) - if root then - if type(root) == 'string' then - return root - elseif next(root) then - local result = { } - xml.serialize(root,function(s) result[#result+1] = s end) - return table.concat(result,"") - end - end - return "" -end - -xml.tostring = xml.stringify - -do - - -- print - - local newline = lpeg.P("\n") - local space = lpeg.P(" ") - local content = lpeg.C((1-newline)^1) - - if tex then - - -- taco: we need a kind of raw print into tex, i.e. embedded \n's become lineendings - -- for tex and an empty line a par; could be a c-wrapper around existing stuff; i - -- played a lot with tex.print but that does not work ok (should be obeylines save) - - local buffer = {} +--[[ldx-- +

A few helpers:

+--ldx]]-- - local function cprint(s) - buffer[#buffer+1] = s - end - local function nprint( ) - if #buffer > 0 then - if xml.trace_print then - texio.write_nl(string.format("tex.print : [[[%s]]]", table.join(buffer))) - end - tex.print(table.join(buffer)) - buffer = {} - else - if xml.trace_print then - texio.write_nl(string.format("tex.print : [[[%s]]]", "")) - end - tex.print("") - end - end - local function fprint() - if #buffer > 0 then - if xml.trace_print then - texio.write_nl(string.format("tex.sprint: [[[%s]]]", table.join(buffer))) - end - tex.sprint(table.join(buffer)) - buffer = { } - end - end +function xml.body(root) + return (root.ri and root.dt[root.ri]) or root +end - local line_n = newline / nprint - local line_c = content / cprint - local capture = (line_n + line_c)^0 +function xml.text(root) + return (root and xml.tostring(root)) or "" +end - local function sprint(root) - if not root then - -- quit - elseif type(root) == 'string' then - lpeg.match(capture,root) - elseif next(root) then - xml.serialize(root, sprint, nil, nil, true) - end - end +function xml.content(root) + return (root and root.dt and xml.tostring(root.dt)) or "" +end - function xml.sprint(root) - buffer = {} - sprint(root) - if #buffer > 0 then - nprint() - end - end +--[[ldx-- +

The next helper erases an element but keeps the table as it is, +and since empty strings are not serialized (effectively) it does +not harm. Copying the table would take more time. Usage:

- xml.sflush = fprint + +dt[k] = xml.empty() or xml.empty(dt,k) + +--ldx]]-- +function xml.empty(dt,k) + if dt and k then + dt[k] = "" + return dt[k] else + return "" + end +end - function xml.sprint(root) - if not root then - -- quit - elseif type(root) == 'string' then - print(root) - elseif next(root) then - xml.serialize(root, xml.sprint, nil, nil, true) - end - end +--[[ldx-- +

The next helper assigns a tree (or string). Usage:

- end + +dt[k] = xml.assign(root) or xml.assign(dt,k,root) + +--ldx]]-- - function xml.tprint(root) - if type(root) == "table" then - for i=1,#root do - xml.sprint(root[i]) - end - elseif type(root) == "string" then - xml.sprint(root) - end +function xml.assign(dt,k,root) + if dt and k then + dt[k] = (type(root) == "table" and xml.body(root)) or root + return dt[k] + else + return xml.body(root) end +end - -- lines (looks hackery, but we cannot pass variables in capture functions) +--[[ldx-- +

We've now arrived at an intersting part: accessing the tree using a subset +of and since we're not compatible we call it . We +will explain more about its usage in other documents.

+--ldx]]-- - local buffer, flush = {}, nil +do - local function cprint(s) - buffer[#buffer+1] = s - end - local function nprint() - flush() - end - - local line_n = newline / nprint - local line_c = content / cprint - local capture = (line_n + line_c)^0 - - function lines(root) - if not root then - -- quit - elseif type(root) == 'string' then - lpeg.match(capture,root) - elseif next(root) then - xml.serialize(root, lines) - end - end - - function xml.lines(root) - local result = { } - flush = function() - result[#result+1] = table.join(buffer) - buffer = { } - end - buffer = {} - lines(root) - if #buffer > 0 then - result[#result+1] = table.join(buffer) - end - return result - end - -end - -function xml.text(root) - return (root and xml.stringify(root)) or "" -end - -function xml.content(root) - return (root and root.dt and xml.tostring(root.dt)) or "" -end - -function xml.body(t) -- removes initial pi - if t and t.dt and t.tg == "@rt@" then - for k,v in ipairs(t.dt) do - if type(v) == "table" and v.tg ~= "@pi@" then - return v - end - end - end - return t -end - --- call: e[k] = xml.empty() or xml.empty(e,k) - -function xml.empty(e,k) -- erases an element but keeps the table intact - if e and k then - e[k] = "" - return e[k] - else - return "" - end -end - --- call: e[k] = xml.assign(t) or xml.assign(e,k,t) - -function xml.assign(e,k,t) -- assigns xml tree / more testing will be done - if e and k then - if type(t) == "table" then - e[k] = xml.body(t) - else - e[k] = t -- no parsing - end - return e[k] - else - return xml.body(t) - end -end - --- 0=nomatch 1=match 2=wildcard 3=ancestor - --- "tag" --- "tag1/tag2/tag3" --- "*/tag1/tag2/tag3" --- "/tag1/tag2/tag3" --- "/tag1/tag2|tag3" --- "tag[@att='value'] --- "tag1|tag2[@att='value'] - -function xml.tag(e) - return e.tg or "" -end - -function xml.att(e,a) - return (e.at and e.at[a]) or "" -end - -xml.attribute = xml.att - ---~ local cache = { } - ---~ local function f_fault ( ) return 0 end ---~ local function f_wildcard( ) return 2 end ---~ local function f_result (b) if b then return 1 else return 0 end end - ---~ function xml.lpath(str) --maybe @rt@ special ---~ if not str or str == "" then ---~ str = "*" ---~ end ---~ local m = cache[str] ---~ if not m then ---~ -- todo: text() ---~ if type(str) == "table" then ---~ if xml.trace_lpath then print("lpath", "table" , "inherit") end ---~ m = str ---~ elseif str == "/" then ---~ if xml.trace_lpath then print("lpath", "/", "root") end ---~ m = false ---~ elseif str == "*" then ---~ if xml.trace_lpath then print("lpath", "no string or *", "wildcard") end ---~ m = true ---~ else ---~ str = str:gsub("^//","") -- any ---~ if str == "" then ---~ if xml.trace_lpath then print("lpath", "//", "wildcard") end ---~ m = true ---~ else ---~ m = { } ---~ if not str:find("^/") then ---~ m[1] = 2 ---~ end ---~ for v in str:gmatch("([^/]+)") do ---~ if v == "" or v == "*" then ---~ if #m > 0 then -- when not, then we get problems with root being second (after (we could start at dt[2]) ---~ if xml.trace_lpath then print("lpath", "empty or *", "wildcard") end ---~ m[#m+1] = 2 ---~ end ---~ elseif v == ".." then ---~ if xml.trace_lpath then print("lpath", "..", "ancestor") end ---~ m[#m+1] = 3 ---~ else ---~ local a, b = v:match("^(.+)::(.-)$") ---~ if a and b then ---~ if a == "ancestor" then ---~ if xml.trace_lpath then print("lpath", a, "ancestor") end ---~ m[#m+1] = 3 ---~ -- todo: b ---~ elseif a == "pi" then ---~ if xml.trace_lpath then print("lpath", a, "processing instruction") end ---~ local expr = "^" .. b .. " " ---~ m[#m+1] = function(e) ---~ if e.tg == '@pi@' and e.dt[1]:find(expr) then ---~ return 6 ---~ else ---~ return 0 ---~ end ---~ end ---~ end ---~ else ---~ local n, a, t = v:match("^(.-)%[@(.-)=(.-)%]$") ---~ if n and a and t then ---~ -- todo: namespace, negate ---~ -- t = t:gsub("^\'(.*)\'$", "%1") ---~ -- t = t:gsub("^\"(.*)\"$", "%1") ---~ -- t = t:sub(2,-2) -- "" or '' mandate ---~ t = t:gsub("^([\'\"])(.-)%1$", "%2") ---~ if n:find("|") then ---~ local tt = n:split("|") ---~ if xml.trace_lpath then print("lpath", "match", t, n) end ---~ m[#m+1] = function(e,i) ---~ for i=1,#tt do ---~ if e.at and e.tg == tt[i] and e.at[a] == t then return 1 end ---~ end ---~ return 0 ---~ end ---~ else ---~ if xml.trace_lpath then print("lpath", "match", t, n) end ---~ m[#m+1] = function(e) ---~ if e.at and e.ns == s and e.tg == n and e.at[a] == t then ---~ return 1 ---~ else ---~ return 0 ---~ end ---~ end ---~ end ---~ else -- todo, better tracing (string.format, ook negate etc) ---~ local negate = v:sub(1,1) == '^' ---~ if negate then v = v:sub(2) end ---~ if v:find("|") then ---~ local t = { } ---~ for s in v:gmatch("([^|]+)") do ---~ local ns, tg = s:match("^(.-):(.+)$") ---~ if tg == "*" then ---~ if xml.trace_lpath then print("lpath", "or wildcard", ns, tg) end ---~ t[#t+1] = function(e) return e.ns == ns end ---~ elseif tg then ---~ if xml.trace_lpath then print("lpath", "or match", ns, tg) end ---~ t[#t+1] = function(e) return e.ns == ns and e.tg == tg end ---~ else ---~ if xml.trace_lpath then print("lpath", "or match", s) end ---~ t[#t+1] = function(e) return e.ns == "" and e.tg == s end ---~ end ---~ end ---~ if negate then ---~ m[#m+1] = function(e) ---~ for i=1,#t do if t[i](e) then return 0 end end return 1 ---~ end ---~ else ---~ m[#m+1] = function(e) ---~ for i=1,#t do if t[i](e) then return 1 end end return 0 ---~ end ---~ end ---~ else ---~ if xml.trace_lpath then print("lpath", "match", v) end ---~ local ns, tg = v:match("^(.-):(.+)$") ---~ if not tg then ns, tg = "", v end ---~ if tg == "*" then ---~ if ns ~= "" then ---~ m[#m+1] = function(e) ---~ if ns == e.ns then return 1 else return 0 end ---~ end ---~ end ---~ elseif negate then ---~ m[#m+1] = function(e) ---~ if ns == e.ns and tg == e.tg then return 0 else return 1 end ---~ end ---~ else ---~ m[#m+1] = function(e) ---~ if ns == e.ns and tg == e.tg then return 1 else return 0 end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ if xml.trace_lpath then ---~ print("# lpath criteria:", (type(m) == "table" and #m) or "none") ---~ end ---~ cache[str] = m ---~ end ---~ return m ---~ end - ---~ -- if handle returns true, then quit - ---~ function xml.traverse(root,pattern,handle,reverse,index,wildcard) ---~ if not root then -- error ---~ return false ---~ elseif pattern == false then -- root ---~ handle(root,root.dt,root.ri) ---~ return false ---~ elseif pattern == true then -- wildcard ---~ local traverse = xml.traverse ---~ local rootdt = root.dt ---~ if rootdt then ---~ local start, stop, step = 1, #rootdt, 1 ---~ if reverse then ---~ start, stop, step = stop, start, -1 ---~ end ---~ for k=start,stop,step do ---~ if handle(root,rootdt,root.ri or k) then return false end ---~ if not traverse(rootdt[k],true,handle,reverse) then return false end ---~ end ---~ end ---~ return false ---~ elseif root and root.dt then ---~ index = index or 1 ---~ local match = pattern[index] or f_wildcard ---~ local traverse = xml.traverse ---~ local rootdt = root.dt ---~ local start, stop, step = 1, #rootdt, 1 ---~ if reverse and index == #pattern then -- maybe no index test here / error? ---~ start, stop, step = stop, start, -1 ---~ end ---~ for k=start,stop,step do ---~ local e = rootdt[k] ---~ if e.tg then ---~ local m = (type(match) == "function" and match(e,root)) or match ---~ if m == 1 then -- match ---~ if index < #pattern then ---~ if not traverse(e,pattern,handle,reverse,index+1) then return false end ---~ else ---~ if handle(root,rootdt,root.ri or k) then ---~ return false ---~ end ---~ -- tricky, where do we pick up, is this ok now ---~ if pattern[1] == 2 then -- start again with new root (we need a way to inhibit this) ---~ if not traverse(e,pattern,handle,reverse,1) then return false end ---~ end ---~ end ---~ elseif m == 2 then -- wildcard ---~ if index < #pattern then ---~ -- : "a" (true) "/a" (true) "b" (true) "/b" (false) ---~ -- not good yet, we need to pick up any prev level which is 2 ---~ local p = pattern[2] ---~ if index == 1 and p then ---~ local mm = (type(p) == "function" and p(e,root)) or p -- pattern[2](e,root) ---~ if mm == 1 then ---~ if #pattern == 2 then ---~ if handle(root,rootdt,k) then ---~ return false ---~ end ---~ -- hack ---~ if pattern[1] == 2 then -- start again with new root (we need a way to inhibit this) ---~ if not traverse(e,pattern,handle,reverse,1) then return false end ---~ end ---~ else ---~ if not traverse(e,pattern,handle,reverse,3) then return false end ---~ end ---~ else ---~ if not traverse(e,pattern,handle,reverse,index+1,true) then return false end ---~ end ---~ else ---~ if not traverse(e,pattern,handle,reverse,index+1,true) then return false end ---~ end ---~ elseif handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif m == 3 then -- ancestor ---~ local ep = e.__p__ ---~ if index < #pattern then ---~ if not traverse(ep,pattern,handle,reverse,index+1) then return false end ---~ elseif handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif m == 4 then -- just root ---~ if handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif m == 6 then -- pi ---~ if handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif wildcard then -- maybe two kind of wildcards: * ** // ---~ if not traverse(e,pattern,handle,reverse,index,wildcard) then return false end ---~ end ---~ end ---~ end ---~ end ---~ return true ---~ end - ---~ Y a/b ---~ Y /a/b ---~ Y a/*/b ---~ Y a//b ---~ Y child:: ---~ Y .// ---~ Y .. ---~ N id("tag") ---~ Y parent:: ---~ Y child:: ---~ N preceding-sibling:: (same name) ---~ N following-sibling:: (same name) ---~ N preceding-sibling-of-self:: (same name) ---~ N following-sibling-or-self:: (same name) ---~ Y ancestor:: ---~ N descendent:: ---~ N preceding:: ---~ N following:: ---~ N self::node() ---~ N node() == alles ---~ N a[position()=5] ---~ Y a[5] ---~ Y a[-5] ---~ N a[first()] ---~ N a[last()] ---~ Y a/(b|c|d)/e/f ---~ N (c/d|e) ---~ Y a/b[@bla] ---~ Y a/b[@bla='oeps'] ---~ Y a/b[@bla=='oeps'] ---~ Y a/b[@bla<>'oeps'] ---~ Y a/b[@bla!='oeps'] ---~ Y a/b/@bla - ---~ Y ^/a/c (root) ---~ Y ^^/a/c (docroot) ---~ Y root::a/c (docroot) - ---~ no wild card functions (yet) - ---~ s = "/a//b/*/(c|d|e)/(f|g)/h[4]/h/child::i/j/(a/b)/p[-1]/q[4]/ancestor::q/r/../s/./t[@bla='true']/k" - --- // == /**/ --- / = ^ (root) - -do - - function analyze(str) - if not str then - return "" - else - local tmp, result, map, key = { }, { }, { }, str - str = str:gsub("(%b[])", function(s) tmp[#tmp+1] = s return '[['..#tmp..']]' end) - str = str:gsub("(%b())", function(s) tmp[#tmp+1] = s return '[['..#tmp..']]' end) - str = str:gsub("(%^+)([^/])", "%1/%2") - str = str:gsub("//+", "/**/") - str = str:gsub(".*root::", "^/") - str = str:gsub("child::", "") - str = str:gsub("ancestor::", "../") - str = str:gsub("self::", "./") - str = str:gsub("^/", "^/") - for s in str:gmatch("([^/]+)") do - s = s:gsub("%[%[(%d+)%]%]",function(n) return tmp[tonumber(n)] end) - result[#result+1] = s - end - cache[key] = result - return result - end - end - - actions = { + local actions = { [10] = "stay", [11] = "parent", [12] = "subtree root", @@ -2381,112 +2153,168 @@ do [21] = "match one of", [22] = "match and attribute eq", [23] = "match and attribute ne", - [23] = "match and attribute present", + [24] = "match one of and attribute eq", + [25] = "match one of and attribute ne", + [27] = "has attribute", + [28] = "has value", + [29] = "fast match", [30] = "select", [40] = "processing instruction", } - function compose(result) - if not result or #result == 0 then + local map = { } + + local space = lpeg.S(' \r\n\t') + local squote = lpeg.S("'") + local dquote = lpeg.S('"') + local lparent = lpeg.P('(') + local rparent = lpeg.P(')') + local atsign = lpeg.P('@') + local lbracket = lpeg.P('[') + local rbracket = lpeg.P(']') + local exclam = lpeg.P('!') + local period = lpeg.P('.') + local eq = lpeg.P('==') + lpeg.P('=') + local ne = lpeg.P('<>') + lpeg.P('!=') + local star = lpeg.P('*') + local slash = lpeg.P('/') + local colon = lpeg.P(':') + local bar = lpeg.P('|') + local hat = lpeg.P('^') + local valid = lpeg.R('az', 'AZ', '09') + lpeg.S('_-') + local name_yes = lpeg.C(valid^1) * colon * lpeg.C(valid^1) + local name_nop = lpeg.C(lpeg.P(true)) * lpeg.C(valid^1) + local name = name_yes + name_nop + local number = lpeg.C((lpeg.S('+-')^0 * lpeg.R('09')^1)) / tonumber + local names = (bar^0 * name)^1 + local morenames = name * (bar^0 * name)^1 + local instructiontag = lpeg.P('pi::') + local spacing = lpeg.C(space^0) + local somespace = space^1 + local optionalspace = space^0 + local text = lpeg.C(valid^0) + local value = (squote * lpeg.C((1 - squote)^0) * squote) + (dquote * lpeg.C((1 - dquote)^0) * dquote) + local empty = 1-slash + + local is_eq = lbracket * atsign * name * eq * value * rbracket + local is_ne = lbracket * atsign * name * ne * value * rbracket + local is_attribute = lbracket * atsign * name * rbracket + local is_value = lbracket * value * rbracket + local is_number = lbracket * number * rbracket + + local is_one = name + local is_none = exclam * name + local is_one_of = ((lparent * names * rparent) + morenames) + local is_none_of = exclam * ((lparent * names * rparent) + morenames) + + local stay = (period ) + local parent = (period * period ) / function( ) map[#map+1] = { 11 } end + local subtreeroot = (slash + hat ) / function( ) map[#map+1] = { 12 } end + local documentroot = (hat * hat ) / function( ) map[#map+1] = { 13 } end + local any = (star ) / function( ) map[#map+1] = { 14 } end + local many = (star * star ) / function( ) map[#map+1] = { 15 } end + local initial = (hat * hat * hat ) / function( ) map[#map+1] = { 16 } end + + local match = (is_one ) / function(...) map[#map+1] = { 20, true , ... } end + local match_one_of = (is_one_of ) / function(...) map[#map+1] = { 21, true , ... } end + local dont_match = (is_none ) / function(...) map[#map+1] = { 20, false, ... } end + local dont_match_one_of = (is_none_of ) / function(...) map[#map+1] = { 21, false, ... } end + + local match_and_eq = (is_one * is_eq ) / function(...) map[#map+1] = { 22, true , ... } end + local match_and_ne = (is_one * is_ne ) / function(...) map[#map+1] = { 23, true , ... } end + local dont_match_and_eq = (is_none * is_eq ) / function(...) map[#map+1] = { 22, false, ... } end + local dont_match_and_ne = (is_none * is_ne ) / function(...) map[#map+1] = { 23, false, ... } end + + local match_one_of_and_eq = (is_one_of * is_eq ) / function(...) map[#map+1] = { 24, true , ... } end + local match_one_of_and_ne = (is_one_of * is_ne ) / function(...) map[#map+1] = { 25, true , ... } end + local dont_match_one_of_and_eq = (is_none_of * is_eq ) / function(...) map[#map+1] = { 24, false, ... } end + local dont_match_one_of_and_ne = (is_none_of * is_ne ) / function(...) map[#map+1] = { 25, false, ... } end + + local has_attribute = (is_one * is_attribute) / function(...) map[#map+1] = { 27, true , ... } end + local has_value = (is_one * is_value ) / function(...) map[#map+1] = { 28, true , ... } end + local dont_has_attribute = (is_none * is_attribute) / function(...) map[#map+1] = { 27, false, ... } end + local dont_has_value = (is_none * is_value ) / function(...) map[#map+1] = { 28, false, ... } end + local position = (is_one * is_number ) / function(...) map[#map+1] = { 30, true, ... } end + local dont_position = (is_none * is_number ) / function(...) map[#map+1] = { 30, false, ... } end + + local instruction = (instructiontag * text ) / function(...) map[#map+1] = { 40, ... } end + local nothing = (empty ) / function( ) map[#map+1] = { 15 } end -- 15 ? + local crap = (1-slash)^1 + + -- a few ugly goodies: + + local docroottag = lpeg.P('^^') / function( ) map[#map+1] = { 12 } end + local subroottag = lpeg.P('^') / function( ) map[#map+1] = { 13 } end + local roottag = lpeg.P('root::') / function( ) map[#map+1] = { 12 } end + local parenttag = lpeg.P('parent::') / function( ) map[#map+1] = { 11 } end + local childtag = lpeg.P('child::') + local selftag = lpeg.P('self::') + + -- there will be more and order will be optimized + + local selector = ( + instruction + + many + any + + parent + stay + + dont_position + position + + dont_match_one_of_and_eq + dont_match_one_of_and_ne + + match_one_of_and_eq + match_one_of_and_ne + + dont_match_and_eq + dont_match_and_ne + + match_and_eq + match_and_ne + + has_attribute + has_value + + dont_match_one_of + match_one_of + + dont_match + match + + crap + empty + ) + + local grammar = lpeg.P { "startup", + startup = (initial + documentroot + subtreeroot + roottag + docroottag + subroottag)^0 * lpeg.V("followup"), + followup = ((slash + parenttag + childtag + selftag)^0 * selector)^1, + } + + function compose(str) + if not str or str == "" then -- wildcard return true - elseif #result == 1 then - local r = result[1][1] - if r == "14" or r == "15" then - -- wildcard + elseif str == '/' then + -- root + return false + else + map = { } + grammar:match(str) + if #map == 0 then return true - elseif r == "12" then - -- root - return false - end - end - local map = { } - for r=1,#result do - local ri = result[r] - if ri == "." then - -- skip - elseif ri == ".." then - map[#map+1] = { 11 } - elseif ri == "^" then - map[#map+1] = { 12 } - elseif ri == "^^" then - map[#map+1] = { 13 } - elseif ri == "*" then - map[#map+1] = { 14 } - elseif ri == "**" then - map[#map+1] = { 15 } else - local m = ri:match("^%((.*)%)$") -- (a|b|c) - if m or ri:find('|') then - m = m or ri - if m:find("[%[%]%(%)%/]") then -- []()/ - -- error - else - local t = { 21 } - for s in m:gmatch("([^|])") do - local ns, tg = s:match("^(.-):?([^:]+)$") - t[#t+1] = ns - t[#t+1] = tg - end - map[#map+1] = t - end - else - local s, f = ri:match("^(.-)%[%s*(.+)%s*%]$") --aaa[bbb] - if s and f then - local ns, tg = s:match("^(.-):?([^:]+)$") - local at, op, vl = f:match("^@(.-)([!=<>]?)([^!=<>]+)$") -- [@a=='b'] - if op and op ~= "" then - if op == '=' or op == '==' then - map[#map+1] = { 22, ns, tg, at, (vl:gsub("^([\'\"])(.*)%1$", "%2")) } - elseif op == '<>' or op == '!=' then - map[#map+1] = { 23, ns, tg, at, (vl:gsub("^([\'\"])(.*)%1$", "%2")) } - else - -- error - end - elseif f:find("^([%-%+%d]+)$")then - map[#map+1] = { 30, ns, tg, tonumber(f) } - elseif vl ~= "" then - map[#map+1] = { 24, ns, tg, vl } - end - else - local pi = ri:match("^pi::(.-)$") - if pi then - map[#map+1] = { 40, pi } - else - map[#map+1] = { 20, ri:match("^(.-):?([^:]+)$") } - end + local m = map[1][1] + if #map == 1 then + if m == 14 or m == 15 then + -- wildcard + return true + elseif m == 12 then + -- root + return false end + elseif #map == 2 and m == 12 and map[2][1] == 20 then + return { { 29, map[2][2], map[2][3] } } end + if m ~= 11 and m ~= 12 and m ~= 13 and m ~= 14 and m ~= 15 and m ~= 16 then + table.insert(map, 1, { 16 }) + end + return map end end - -- if we have a symbol, we can prepend that to the string, which is faster - local mm = map[1] or { } - local r = mm[1] or 0 - if #map == 1 then - if r == 14 or r == 15 then - -- wildcard - return true - elseif r == 12 then - -- root - return false - end - end - if r ~= 11 and r ~= 12 and r ~= 13 and r ~= 14 and r ~= 15 then - table.insert(map, 1, { 16 }) - end - return map end - cache = { } + local cache = { } - function xml.lpath(pattern) + function xml.lpath(pattern,trace) if type(pattern) == "string" then local result = cache[pattern] if not result then - result = compose(analyze(pattern)) + result = compose(pattern) cache[pattern] = result end - if xml.trace_lpath then + if trace or xml.trace_lpath then xml.lshow(result) end return result @@ -2495,23 +2323,58 @@ do end end - function xml.lshow(pattern) + local fallbackreport = (texio and texio.write) or io.write + + function xml.lshow(pattern,report) + report = report or fallbackreport local lp = xml.lpath(pattern) if lp == false then - print("root") + report(" -: root\n") elseif lp == true then - print("wildcard") + report(" -: wildcard\n") else - if type(pattern) ~= "table" then - print("pattern: " .. tostring(pattern)) + if type(pattern) == "string" then + report(string.format("pattern: %s\n",pattern)) end for k,v in ipairs(lp) do - print(k,actions[v[1]],table.join(v," ",2)) + if #v > 1 then + local t = { } + for i=2,#v do + local vv = v[i] + if type(vv) == "string" then + t[#t+1] = (vv ~= "" and vv) or "#" + elseif type(vv) == "boolean" then + t[#t+1] = (vv and "==") or "<>" + end + end + report(string.format("%2i: %s %s -> %s\n", k,v[1],actions[v[1]],table.join(t," "))) + else + report(string.format("%2i: %s %s\n", k,v[1],actions[v[1]])) + end end end end - function xml.traverse(root,pattern,handle,reverse,index,wildcard) +end + +--[[ldx-- +

An is converted to a table with instructions for traversing the +tree. Hoever, simple cases are signaled by booleans. Because we don't know in +advance what we want to do with the found element the handle gets three arguments:

+ + +r : the root element of the data table +d : the data table of the result +t : the index in the data table of the result + + +

Access to the root and data table makes it possible to construct insert and delete +functions.

+--ldx]]-- + +do + + function xml.traverse(root,pattern,handle,reverse,index,parent,wildcard) if not root then -- error return false elseif pattern == false then -- root @@ -2531,103 +2394,172 @@ do end end return false - elseif root and root.dt then + elseif root.dt then index = index or 1 local action = pattern[index] local command = action[1] - if (command == 16 or command == 12) and index == 1 then -- initial - wildcard = true - index = index + 1 - action = pattern[index] - command = action[1] - end - local traverse = xml.traverse - local rootdt = root.dt - local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1 - if command == 30 then - if action[4] < 0 then - start, stop, step = stop, start, -1 - dn = -1 + if command == 29 then -- fast case /oeps + local rootdt = root.dt + for k=1,#rootdt do + local e = rootdt[k] + local ns, tg = e.rn or e.ns, e.tg + if ns == action[2] and tg == action[3] then + if handle(root,rootdt,k) then return false end + end end - elseif reverse and index == #pattern then - start, stop, step = stop, start, -1 - end - for k=start,stop,step do - local e = rootdt[k] - local ns, tg = e.ns, e.tg - if tg then + elseif command == 11 then -- parent + local ep = root.__p__ or parent + if index < #pattern then + if not xml.traverse(ep,pattern,handle,reverse,index+1,root) then return false end + elseif handle(root,rootdt,k) then + return false + end + else + if (command == 16 or command == 12) and index == 1 then -- initial + wildcard = true + index = index + 1 + action = pattern[index] + command = action and action[1] or 0 -- something is wrong + end + if command == 11 then -- parent + local ep = root.__p__ or parent + if index < #pattern then + if not xml.traverse(ep,pattern,handle,reverse,index+1,root) then return false end + elseif handle(root,rootdt,k) then + return false + end + else + local traverse = xml.traverse + local rootdt = root.dt + local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1 if command == 30 then - if ns == action[2] and tg == action[3] then - n = n + dn - if n == action[4] then - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end - else - if not traverse(e,pattern,handle,reverse,index+1) then return false end - end - break - end - elseif wildcard then - if not traverse(e,pattern,handle,reverse,index,true) then return false end + if action[5] < 0 then + start, stop, step = stop, start, -1 + dn = -1 end - else - local matched = false - if command == 20 then -- match - matched = ns == action[2] and tg == action[3] - elseif command == 21 then -- match one of - for i=2,#action,2 do - if ns == action[i] and tg == action[i+1] then - matched = true - break + elseif reverse and index == #pattern then + start, stop, step = stop, start, -1 + end + for k=start,stop,step do + local e = rootdt[k] + local ns, tg = e.rn or e.ns, e.tg + if tg then + if command == 30 then + local matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + if matched then + n = n + dn + if n == action[5] then + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + else + if not traverse(e,pattern,handle,reverse,index+1,root) then return false end + end + break + end + elseif wildcard then + if not traverse(e,pattern,handle,reverse,index,root,true) then return false end end - end - elseif command == 22 then -- eq - matched = ns == action[2] and tg == action[3] and e.at[action[4]] == action[5] - elseif command == 23 then -- ne - matched = ns == action[2] and tg == action[3] and e.at[action[4]] ~= action[5] - elseif command == 24 then -- present - matched = ns == action[2] and tg == action[3] and e.at[action[4]] - end - if matched then -- combine tg test and at test - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end else - if not traverse(e,pattern,handle,reverse,index+1) then return false end - end - elseif command == 14 then -- any - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end - else - if not traverse(e,pattern,handle,reverse,index+1) then return false end - end - elseif command == 15 then -- many - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end - else - if not traverse(e,pattern,handle,reverse,index+1,true) then return false end - end - elseif command == 11 then -- parent - local ep = e.__p__ - if index < #pattern then - if not traverse(ep,pattern,handle,reverse,index+1) then return false end - elseif handle(root,rootdt,k) then - return false - end - break - elseif command == 40 and tg == "@pi@" then -- pi - local pi = action[2] - if pi ~= "" then - local pt = e.dt[1] - if pt and pt:find(pi) then - if handle(root,rootdt,k) then + local matched, multiple = false, false + if command == 20 then -- match + matched = ns == action[2] and tg == action[3] + if action[2] then matched = not matched end + elseif command == 21 then -- match one of + multiple = true + for i=2,#action,2 do + if ns == action[i] and tg == action[i+1] then matched = true break end + end + if action[2] then matched = not matched end + elseif command == 22 then -- eq + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = matched and e.at[action[6]] == action[7] + elseif command == 23 then -- ne + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = mached and e.at[action[6]] ~= action[7] + elseif command == 24 then -- one of eq + multiple = true + for i=3,#action-2,2 do + if ns == action[i] and tg == action[i+1] then matched = true break end + end + if action[2] then matched = not matched end + matched = matched and e.at[action[#action-1]] == action[#action] + elseif command == 25 then -- one of ne + multiple = true + for i=3,#action-2,2 do + if ns == action[i] and tg == action[i+1] then matched = true break end + end + if action[2] then matched = not matched end + matched = matched and e.at[action[#action-1]] ~= action[#action] + elseif command == 27 then -- has attribute + local ans = action[3] + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = matched and e.at[action[5]] + elseif command == 28 then -- has value + local edt = e.dt + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = matched and edt and edt[1] == action[5] + end + if matched then -- combine tg test and at test + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + if wildcard and multiple then + if not traverse(e,pattern,handle,reverse,index,root,true) then return false end + end + else + if not traverse(e,pattern,handle,reverse,index+1,root) then return false end + end + elseif command == 14 then -- any + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + else + if not traverse(e,pattern,handle,reverse,index+1,root) then return false end + end + elseif command == 15 then -- many + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + else + if not traverse(e,pattern,handle,reverse,index+1,root,true) then return false end + end + -- not here : 11 + elseif command == 11 then -- parent + local ep = e.__p__ or parent + if index < #pattern then + if not traverse(ep,pattern,handle,reverse,root,index+1) then return false end + elseif handle(root,rootdt,k) then return false end + elseif command == 40 and e.special and tg == "@pi@" then -- pi + local pi = action[2] + if pi ~= "" then + local pt = e.dt[1] + if pt and pt:find(pi) then + if handle(root,rootdt,k) then + return false + end + end + elseif handle(root,rootdt,k) then + return false + end + elseif wildcard then + if not traverse(e,pattern,handle,reverse,index,root,true) then return false end end - elseif handle(root,rootdt,k) then - return false end - elseif wildcard then - if not traverse(e,pattern,handle,reverse,index,true) then return false end + else + -- not here : 11 + if command == 11 then -- parent + local ep = e.__p__ or parent + if index < #pattern then + if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end + elseif handle(root,rootdt,k) then + return false + end + break -- else loop + end end end end @@ -2636,15 +2568,71 @@ do return true end +end + +--[[ldx-- +

Next come all kind of locators and manipulators. The most generic function here +is xml.filter(root,pattern). All registers functions in the filters namespace +can be path of a search path, as in:

+ + +local r, d, k = xml.filter(root,"/a/b/c/position(4)" + +--ldx]]-- + +do + local traverse, lpath, convert = xml.traverse, xml.lpath, xml.convert xml.filters = { } + --[[ldx-- +

For splitting the filter function from the path specification, we can + use string matching or lpeg matching. Here the difference in speed is + neglectable but the lpeg variant is more robust.

+ --ldx]]-- + + -- function xml.filter(root,pattern) + -- local pat, fun, arg = pattern:match("^(.+)/(.-)%((.*)%)$") + -- if fun then + -- return (xml.filters[fun] or xml.filters.default)(root,pat,arg) + -- else + -- pat, arg = pattern:match("^(.+)/@(.-)$") + -- if arg then + -- return xml.filters.attributes(root,pat,arg) + -- else + -- return xml.filters.default(root,pattern) + -- end + -- end + -- end + + -- not faster but hipper ... although ... i can't get rid of the trailing / in the path + + local name = (lpeg.R("az","AZ")+lpeg.R("_-"))^1 + local path = lpeg.C(((1-lpeg.P('/'))^0 * lpeg.P('/'))^1) + local argument = lpeg.P { "(" * lpeg.C(((1 - lpeg.S("()")) + lpeg.V(1))^0) * ")" } + local action = lpeg.Cc(1) * path * lpeg.C(name) * argument + local attribute = lpeg.Cc(2) * path * lpeg.P('@') * lpeg.C(name) + + local parser = action + attribute + + function xml.filter(root,pattern) + local kind, a, b, c = parser:match(pattern) + if kind == 1 then + return (xml.filters[b] or xml.filters.default)(root,a,c) + elseif kind == 2 then + return xml.filters.attributes(root,a,b) + else + return xml.filters.default(root,pattern) + end + end + function xml.filters.default(root,pattern) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end) return dt and dt[dk], rt, dt, dk end + function xml.filters.reverse(root,pattern) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse') @@ -2698,17 +2686,14 @@ do traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk, i = r, d, k, i-1 return i == 0 end, reverse) if i == 0 then return dt and dt[dk], rt, dt, dk - else - return nil, nil, nil, nil end - else - return nil, nil, nil, nil end + return nil, nil, nil, nil end function xml.filters.attributes(root,pattern,arguments) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end) - local ekat = dt and dt[dk] and dt[dk].at + local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at) if ekat then if arguments then return ekat[arguments] or "", rt, dt, dk @@ -2722,69 +2707,33 @@ do function xml.filters.attribute(root,pattern,arguments) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end) - local ekat = dt and dt[dk] and dt[dk].at + local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at) return (ekat and ekat[arguments]) or "" end function xml.filters.text(root,pattern,arguments) - local ek, dt, dk, rt = xml.filters.index(root,pattern,arguments) - return (ek and ek.dt) or "", rt, dt, dk - end - - function xml.filter(root,pattern) - local pat, fun, arg = pattern:match("^(.+)/(.-)%((.*)%)$") - if fun then - return (xml.filters[fun] or xml.filters.default)(root,pat,arg) - else - pat, arg = pattern:match("^(.+)/@(.-)$") - if arg then - return xml.filters.attributes(root,pat,arg) + local dtk, rt, dt, dk = xml.filters.index(root,pattern,arguments) + if dtk then + local dtkdt = dtk.dt + if #dtkdt == 1 and type(dtkdt[1]) == "string" then + return dtkdt[1], rt, dt, dk else - return xml.filters.default(root,pattern) + return xml.tostring(dtkdt), rt, dt, dk end + else + return "", rt, dt, dk end end - xml.filters.position = xml.filters.index - - -- these may go away - - xml.index_element = xml.filters.index - xml.count_elements = xml.filters.count - xml.first_element = xml.filters.first - xml.last_element = xml.filters.last - xml.index_text = xml.filters.text - xml.first_text = function (root,pattern) return xml.filters.text(root,pattern, 1) end - xml.last_text = function (root,pattern) return xml.filters.text(root,pattern,-1) end - - -- so far - - function xml.get_text(root,pattern,reverse) - local rt, dt, dk - traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end, reverse) - local ek = dt and dt[dk] - return (ek and ek.dt) or "", rt, dt, dk - end - - function xml.each_element(root, pattern, handle, reverse) - local ok - traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse) - return ok - end - - function xml.get_element(root,pattern,reverse) - local rt, dt, dk - traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end, reverse) - return dt and dt[dk], rt, dt, dk - end - - -- these may change + --[[ldx-- +

The following functions collect elements and texts.

+ --ldx]]-- - function xml.all_elements(root, pattern, ignorespaces) -- ok? + function xml.collect_elements(root, pattern, ignorespaces) local rr, dd = { }, { } traverse(root, lpath(pattern), function(r,d,k) local dk = d and d[k] if dk then - if ignorespaces and type(dk) == "string" and dk:find("^[\s\n]*$") then + if ignorespaces and type(dk) == "string" and dk:find("^%s*$") then -- ignore else local n = #rr+1 @@ -2795,8 +2744,8 @@ do return dd, rr end - function xml.all_texts(root, pattern, flatten) -- crap - local t, r = { }, { } + function xml.collect_texts(root, pattern, flatten) + local t = { } -- no r collector traverse(root, lpath(pattern), function(r,d,k) if d then local ek = d[k] @@ -2813,11 +2762,77 @@ do else t[#t+1] = "" end - r[#r+1] = r end) - return t, r + return t + end + + --[[ldx-- +

Often using an iterators looks nicer in the code than passing handler + functions. The book describes how to use coroutines for that + purpose (). This permits + code like:

+ + + for r, d, k in xml.elements(xml.load('text.xml'),"title") do + print(d[k]) + end + + +

Which will print all the titles in the document. The iterator variant takes + 1.5 times the runtime of the function variant which si due to the overhead in + creating the wrapper. So, instead of:

+ + + function xml.filters.first(root,pattern) + for rt,dt,dk in xml.elements(root,pattern) + return dt and dt[dk], rt, dt, dk + end + return nil, nil, nil, nil + end + + +

We use the function variants in the filters.

+ --ldx]]-- + + function xml.elements(root,pattern,reverse) + return coroutine.wrap(function() traverse(root, lpath(pattern), coroutine.yield, reverse) end) + end + + function xml.each_element(root, pattern, handle, reverse) + local ok + traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse) + return ok + end + + function xml.process_elements(root, pattern, handle) + traverse(root, lpath(pattern), function(r,d,k) + local dkdt = d[k].dt + if dkdt then + for i=1,#dkdt do + local v = dkdt[i] + if v.tg then handle(v) end + end + end + end) + end + + function xml.process_attributes(root, pattern, handle) + traverse(root, lpath(pattern), function(r,d,k) + local ek = d[k] + local a = ek.at or { } + handle(a) + if next(a) then + ek.at = a + else + ek.at = nil + end + end) end + --[[ldx-- +

We've now arrives at the functions that manipulate the tree.

+ --ldx]]-- + function xml.inject_element(root, pattern, element, prepend) if root and element then local matches, collect = { }, nil @@ -2868,7 +2883,7 @@ do function xml.insert_element(root, pattern, element, before) -- todo: element als functie if root and element then if pattern == "/" then - xml.inject_element(root, pattern, element, before) -- todo: element als functie + xml.inject_element(root, pattern, element, before) else local matches, collect = { }, nil if type(element) == "string" then @@ -2898,8 +2913,6 @@ do end end - -- first, last, each - xml.insert_element_after = xml.insert_element xml.insert_element_before = function(r,p,e) xml.insert_element(r,p,e,true) end xml.inject_element_after = xml.inject_element @@ -2930,24 +2943,47 @@ do end end - function xml.process(root, pattern, handle) - traverse(root, lpath(pattern), function(r,d,k) - if d[k].dt then - for k,v in ipairs(d[k].dt) do - if v.tg then handle(v) end + function xml.include(xmldata,element,attribute,pathlist,collapse) + element = element or 'ctx:include' + attribute = attribute or 'name' + pathlist = pathlist or { '.' } + -- todo, check op ri + local function include(r,d,k) + local ek = d[k] + local name = (ek.at and ek.at[attribute]) or "" + if name ~= "" then + -- maybe file lookup in tree + local fullname + for _, path in ipairs(pathlist) do + if path == '.' then + fullname = name + else + fullname = file.join(path,name) + end + local f = io.open(fullname) + if f then + xml.assign(d,k,xml.load(f,collapse)) + f:close() + break + else + xml.empty(d,k) + end end + else + xml.empty(d,k) end - end) + end + while xml.each_element(xmldata, element, include) do end end - function xml.strip(root, pattern) + function xml.strip_whitespace(root, pattern) traverse(root, lpath(pattern), function(r,d,k) local dkdt = d[k].dt - if dkdt then + if dkdt then -- can be optimized local t = { } for i=1,#dkdt do local str = dkdt[i] - if type(str) == "string" and str:find("^[\032\010\012\013]*$") then + if type(str) == "string" and str:find("^[ \n\r\t]*$") then -- stripped else t[#t+1] = str @@ -2958,8 +2994,6 @@ do end) end - -- - function xml.rename_space(root, oldspace, newspace) -- fast variant local ndt = #root.dt local rename = xml.rename_space @@ -2968,6 +3002,9 @@ do if type(e) == "table" then if e.ns == oldspace then e.ns = newspace + if e.rn then + e.rn = newspace + end end local edt = e.dt if edt then @@ -2987,83 +3024,30 @@ do d[k].ns = newns end) end - - -- function xml.process_attributes(root, pattern, handle) - -- traverse(root, lpath(pattern), function(e,k) handle(e[k].at) end) - -- end - - function xml.process_attributes(root, pattern, handle) + function xml.check_namespace(root, pattern, newns) traverse(root, lpath(pattern), function(r,d,k) - local ek = d[k] - local a = ek.at or { } - handle(a) - if next(a) then - ek.at = a - else - ek.at = nil + local dk = d[k] + if (not dk.rn or dk.rn == "") and dk.ns == "" then + dk.rn = newns end end) end - - function xml.package(tag,attributes,data) - local n, t = tag:match("^(.-):(.+)$") - if attributes then - return { ns = n or "", tg = t or tag, dt = data or "", at = attributes } - else - return { ns = n or "", tg = t or tag, dt = data or "" } - end - end - - -- some special functions, handy for the manual: - - function xml.gsub(t,old,new) - if t.dt then - for k,v in ipairs(t.dt) do - if type(v) == "string" then - t.dt[k] = v:gsub(old,new) - else - xml.gsub(v,old,new) - end - end - end - end - - function xml.strip_leading_spaces(ek, e, k) -- cosmetic, for manual - if e and k and e[k-1] and type(e[k-1]) == "string" then - local s = e[k-1]:match("\n(%s+)") - xml.gsub(ek,"\n"..string.rep(" ",#s),"\n") - end - end - - function xml.serialize_path(root,lpath,handle) - local ek, e, k = xml.first_element(root,lpath) - ek = xml.copy(ek) - xml.strip_leading_spaces(ek,e,k) - xml.serialize(ek,handle) - end - - -- http://www.lua.org/pil/9.3.html (or of course the book) - -- - -- it's nice to have an iterator but it comes with some extra overhead - -- - -- for r, d, k in xml.elements(xml.load('text.xml'),"title") do print(d[k]) end - - function xml.elements(root,pattern,reverse) - return coroutine.wrap(function() traverse(root, lpath(pattern), coroutine.yield, reverse) end) + function xml.remap_name(root, pattern, newtg, newns, newrn) + traverse(root, lpath(pattern), function(r,d,k) + local dk = d[k] + dk.tg = newtg + dk.ns = newns + dk.rn = newrn + end) end - -- the iterator variant needs 1.5 times the runtime of the function variant - -- - -- function xml.filters.first(root,pattern) - -- for rt,dt,dk in xml.elements(root,pattern) - -- return dt and dt[dk], rt, dt, dk - -- end - -- return nil, nil, nil, nil - -- end +end - -- todo xml.gmatch for text +--[[ldx-- +

Here are a few synonyms.

+--ldx]]-- -end +xml.filters.position = xml.filters.index xml.count = xml.filters.count xml.index = xml.filters.index @@ -3072,7 +3056,10 @@ xml.first = xml.filters.first xml.last = xml.filters.last xml.each = xml.each_element -xml.all = xml.all_elements +xml.process = xml.process_element +xml.strip = xml.strip_whitespace +xml.collect = xml.collect_elements +xml.all = xml.collect_elements xml.insert = xml.insert_element_after xml.inject = xml.inject_element_after @@ -3081,39 +3068,38 @@ xml.before = xml.insert_element_before xml.delete = xml.delete_element xml.replace = xml.replace_element --- a few helpers, the may move to lxml modules +--[[ldx-- +

The following helper functions best belong to the lmxl-ini +module. Some are here because we need then in the mk +document and other manuals, others came up when playing with +this module. Since this module is also used in we've +put them here instead of loading mode modules there then needed.

+--ldx]]-- -function xml.include(xmldata,element,attribute,pathlist,collapse) - element = element or 'ctx:include' - attribute = attribute or 'name' - pathlist = pathlist or { '.' } - -- todo, check op ri - local function include(r,d,k) - local ek = d[k] - local name = (ek.at and ek.at[attribute]) or "" - if name ~= "" then - -- maybe file lookup in tree - local fullname - for _, path in ipairs(pathlist) do - if path == '.' then - fullname = name - else - fullname = file.join(path,name) - end - local f = io.open(fullname) - if f then - xml.assign(d,k,xml.load(f,collapse)) - f:close() - break - else - xml.empty(d,k) - end +function xml.gsub(t,old,new) + if t.dt then + for k,v in ipairs(t.dt) do + if type(v) == "string" then + t.dt[k] = v:gsub(old,new) + else + xml.gsub(v,old,new) end - else - xml.empty(d,k) end end - while xml.each(xmldata, element, include) do end +end + +function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual + if d and k and d[k-1] and type(d[k-1]) == "string" then + local s = d[k-1]:match("\n(%s+)") + xml.gsub(dk,"\n"..string.rep(" ",#s),"\n") + end +end + +function xml.serialize_path(root,lpath,handle) + local dk, r, d, k = xml.first(root,lpath) + dk = xml.copy(dk) + xml.strip_leading_spaces(dk,d,k) + xml.serialize(dk,handle) end xml.escapes = { ['&'] = '&', ['<'] = '<', ['>'] = '>', ['"'] = '"' } @@ -3124,22 +3110,37 @@ function xml.unescaped(str) return str:gsub("(&.-;)", xml.unescapes) end function xml.cleansed (str) return str:gsub("<.->" , '' ) end -- "%b<>" function xml.join(t,separator,lastseparator) - local result = { } - for k,v in pairs(t) do - result[k] = xml.tostring(v) - end - if lastseparator then - return table.join(result,separator,1,#result-1) .. lastseparator .. result[#result] + if #t > 0 then + local result = { } + for k,v in pairs(t) do + result[k] = xml.tostring(v) + end + if lastseparator then + return table.join(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result] + else + return table.join(result,separator) + end else - return table.join(result,separator) + return "" end end -do if utf then +--[[ldx-- +

We provide (at least here) two entity handlers. The more extensive +resolver consults a hash first, tries to convert to next, +and finaly calls a handler when defines. When this all fails, the +original entity is returned.

+--ldx]]-- + +do if unicode and unicode.utf8 then + + xml.entities = xml.entities or { } -- xml.entities.handler == function + + local char = unicode.utf8.char local function toutf(s) - return utf.char(tonumber(s,16)) + return char(tonumber(s,16)) end function xml.utfize(root) @@ -3147,25 +3148,50 @@ do if utf then for k=1,#d do local dk = d[k] if type(dk) == "string" then - d[k] = dk:gsub("&#x(.-);",toutf) + -- test prevents copying if no match + if dk:find("&#x.-;") then + d[k] = dk:gsub("&#x(.-);",toutf) + end else xml.utfize(dk) end end end -else - function xml.utfize() - print("entity to utf conversion is not available") + + local entities = xml.entities + + local function resolve(e) + local e = entities[e] + if e then + return e + elseif e:find("#x") then + return char(tonumber(s:sub(3),16)) + else + local h = entities.handler + return (h and h(e)) or "&" .. e .. ";" + end end -end end + function xml.resolve_entities(root) + local d = root.dt + for k=1,#d do + local dk = d[k] + if type(dk) == "string" then + if dk:find("&.-;") then + d[k] = dk:gsub("&(.-);",resolve) + end + else + xml.utfize(dk) + end + end + end ---- examples +end end ---~ for _, e in ipairs(xml.filters.elements(ctxrunner.xmldata,"ctx:message")) do ---~ print(">>>",xml.tostring(e.dt)) ---~ end +--~ xml.lshow("/../../../a/(b|c)[@d='e']/f") +--~ xml.lshow("/../../../a/!(b|c)[@d='e']/f") +--~ xml.lshow("/../../../a/!b[@d!='e']/f") -- filename : l-utils.lua diff --git a/scripts/context/ruby/base/pdf.rb b/scripts/context/ruby/base/pdf.rb index 9f4e9a6c3..5aec06fc5 100644 --- a/scripts/context/ruby/base/pdf.rb +++ b/scripts/context/ruby/base/pdf.rb @@ -7,7 +7,7 @@ module PDFview @method = 'default' # 'xpdf' - @opencalls['default'] = "pdfopen --file" + @opencalls['default'] = "pdfopen --file" # "pdfopen --back --file" @opencalls['xpdf'] = "xpdfopen" @closecalls['default'] = "pdfclose --file" diff --git a/scripts/context/ruby/base/tex.rb b/scripts/context/ruby/base/tex.rb index ceb9473c0..73b382af9 100644 --- a/scripts/context/ruby/base/tex.rb +++ b/scripts/context/ruby/base/tex.rb @@ -72,6 +72,7 @@ class TEX @@backends = Hash.new @@mappaths = Hash.new @@runoptions = Hash.new + @@tcxflag = Hash.new @@draftoptions = Hash.new @@texformats = Hash.new @@mpsformats = Hash.new @@ -169,13 +170,19 @@ class TEX ['cont-en','cont-nl','cont-de','cont-it', 'cont-fr','cont-cz','cont-ro','cont-uk'] .each do |f| @@texprocstr[f] = "\\emergencyend" end - # @@runoptions['xetex'] = ['--8bit','-output-driver="xdvipdfmx -E -d 4 -V 5 -q"'] - @@runoptions['xetex'] = ['--8bit','-output-driver="xdvipdfmx -E -d 4 -V 5"'] - @@runoptions['pdfetex'] = ['--8bit'] # obsolete - @@runoptions['pdftex'] = ['--8bit'] # pdftex is now pdfetex - @@runoptions['luatex'] = ['--file-line-error'] @@runoptions['aleph'] = ['--8bit'] + @@runoptions['luatex'] = ['--file-line-error'] @@runoptions['mpost'] = ['--8bit'] + @@runoptions['pdfetex'] = ['--8bit'] # obsolete + @@runoptions['pdftex'] = ['--8bit'] # pdftex is now pdfetex + @@runoptions['xetex'] = ['--8bit','-output-driver="xdvipdfmx -E -d 4 -V 5"'] + + @@tcxflag['aleph'] = true + @@tcxflag['luatex'] = false + @@tcxflag['mpost'] = true + @@tcxflag['pdfetex'] = true + @@tcxflag['pdftex'] = true + @@tcxflag['xetex'] = false @@draftoptions['pdftex'] = ['--draftmode'] @@ -540,11 +547,16 @@ class TEX "--ini" end end - def tcxflag(file="natural.tcx") - if Kpse.miktex? then - "-tcx=#{file}" + def tcxflag(engine) + if @@tcxflag[engine] then + file = "natural.tcx" + if Kpse.miktex? then + "-tcx=#{file}" + else + "-translate-file=#{file}" + end else - "-translate-file=#{file}" + "" end end @@ -661,7 +673,7 @@ class TEX texformats.each do |texformat| report("generating tex format #{texformat}") progname = validprogname([getvariable('progname'),texformat,texengine]) - runcommand([quoted(texengine),prognameflag(progname),iniflag,tcxflag,prefixed(texformat,texengine),texmakeextras(texformat)]) + runcommand([quoted(texengine),prognameflag(progname),iniflag,tcxflag(texengine),prefixed(texformat,texengine),texmakeextras(texformat)]) end end else @@ -682,7 +694,7 @@ class TEX mpsformats.each do |mpsformat| report("generating mps format #{mpsformat}") progname = validprogname([getvariable('progname'),mpsformat,mpsengine]) - if not runcommand([quoted(mpsengine),prognameflag(progname),iniflag,tcxflag,runoptions(mpsengine),mpsformat,mpsmakeextras(mpsformat)]) then + if not runcommand([quoted(mpsengine),prognameflag(progname),iniflag,tcxflag(mpsengine),runoptions(mpsengine),mpsformat,mpsmakeextras(mpsformat)]) then setvariable('error','no format made') end end @@ -1559,7 +1571,7 @@ end run_luatools("--fmt=#{texformat} #{filename}") else progname = validprogname([getvariable('progname'),texformat,texengine]) - runcommand([quoted(texengine),prognameflag(progname),formatflag(texengine,texformat),tcxflag,runoptions(texengine),filename,texprocextras(texformat)]) + runcommand([quoted(texengine),prognameflag(progname),formatflag(texengine,texformat),tcxflag(texengine),runoptions(texengine),filename,texprocextras(texformat)]) end # true else @@ -1574,8 +1586,7 @@ end if mpsengine && mpsformat then ENV["MPXCOMMAND"] = "0" unless mpx progname = validprogname([getvariable('progname'),mpsformat,mpsengine]) - runcommand([quoted(mpsengine),prognameflag(progname),formatflag(mpsengine,mpsformat),tcxflag,runoptions(mpsengine),mpname,mpsprocextras(mpsformat)]) - # runcommand([quoted(mpsengine),formatflag(mpsengine,mpsformat),tcxflag,runoptions(mpsengine),mpname,mpsprocextras(mpsformat)]) + runcommand([quoted(mpsengine),prognameflag(progname),formatflag(mpsengine,mpsformat),tcxflag(mpsengine),runoptions(mpsengine),mpname,mpsprocextras(mpsformat)]) true else false @@ -1790,10 +1801,10 @@ end forcexml = getvariable('forcexml') -if dummyfile || forcexml then # after ctx? - jobsuffix = makestubfile(rawname,rawbase,forcexml) - checkxmlfile(rawname) -end + if dummyfile || forcexml then # after ctx? + jobsuffix = makestubfile(rawname,rawbase,forcexml) + checkxmlfile(rawname) + end # preprocess files @@ -1929,7 +1940,15 @@ end end end # goto . + ok = runtex(File.suffixed(if dummyfile || forcexml then rawbase else rawname end,jobsuffix)) + +if getvariable('texengine') == "xetex" then + ok = true +end + +############################ + # goto tmp/jobname when present if ok && (nofruns > 1) then unless getvariable('nompmode') then diff --git a/scripts/context/ruby/base/texutil.rb b/scripts/context/ruby/base/texutil.rb index 726e31381..9c43f00e9 100644 --- a/scripts/context/ruby/base/texutil.rb +++ b/scripts/context/ruby/base/texutil.rb @@ -400,7 +400,7 @@ class TeXUtil def MyCommands::writer(logger,handle) handle << logger.banner("commands: #{@@commands.size}") @@commands.each do |c| - handle << "#{c}\n" + handle << "#{c}%\n" end end @@ -494,7 +494,7 @@ class TeXUtil end end list.each do |entry| - handle << "\\synonymentry{#{entry.type}}{#{entry.command}}{#{entry.key}}{#{entry.data}}\n" + handle << "\\synonymentry{#{entry.type}}{#{entry.command}}{#{entry.key}}{#{entry.data}}%\n" end end @@ -602,7 +602,7 @@ class TeXUtil end else # @entry, @key = cleanupsplit(@entry), cleanupsplit(@key) -@entry, @key = cleanupsplit(@entry), xcleanupsplit(@key) + @entry, @key = cleanupsplit(@entry), xcleanupsplit(@key) end @sortkey = sorter.simplify(@key) # special = @sortkey =~ /^([^a-zA-Z\\])/o @@ -632,23 +632,23 @@ class TeXUtil end end -def xcleanupsplit(target) # +a+b+c &a&b&c a+b+c a&b&c - t = Array.new - case target[0,1] - when '&' then - t = target.sub(/^./o,'').split(/([^\\])\&/o) - when '+' then - t = target.sub(/^./o,'').split(/([^\\])\+/o) - else - # t = target.split(/([^\\])[\&\+]/o) - # t = target.split(/[\&\+]/o) - t = target.split(/(?!\\)[\&\+]/o) # lookahead - end - if not t[1] then t[1] = " " end # we need some entry else we get subentries first - if not t[2] then t[2] = " " end # we need some entry else we get subentries first - return t.join(@@split) -end - + def xcleanupsplit(target) # +a+b+c &a&b&c a+b+c a&b&c + t = Array.new + case target[0,1] + when '&' then + t = target.sub(/^./o,'').split(/([^\\])\&/o) + when '+' then + t = target.sub(/^./o,'').split(/([^\\])\+/o) + else + # t = target.split(/([^\\])[\&\+]/o) + # t = target.split(/[\&\+]/o) + t = target.split(/(?!\\)[\&\+]/o) # lookahead + end + if not t[1] then t[1] = " " end # we need some entry else we get subentries first + if not t[2] then t[2] = " " end # we need some entry else we get subentries first + if not t[3] then t[3] = " " end # we need some entry else we get subentries first + return t.join(@@split) + end def <=> (other) @sortkey <=> other.sortkey end @@ -661,10 +661,10 @@ end def Register.flushsavedline(handle) if @@collapse && ! @@savedfrom.empty? then if ! @@savedto.empty? then - handle << "\\registerfrom#{@@savedfrom}" - handle << "\\registerto#{@@savedto}" + handle << "\\registerfrom#{@@savedfrom}%" + handle << "\\registerto#{@@savedto}%" else - handle << "\\registerpage#{@@savedfrom}" + handle << "\\registerpage#{@@savedfrom}%" end end @@savedhowto, @@savedfrom, @@savedto, @@savedentry = '', '', '', '' @@ -706,15 +706,15 @@ end elsif alpha == @@specialsymbol then character = @@specialbanner elsif alpha.length > 1 then - # character = "\\getvalue\{#{alpha}\}" - character = "\\#{alpha}" + # character = "\\getvalue\{#{alpha}\}%" + character = "\\#{alpha}%" else character = "\\unknown" end - handle << "\\registerentry{#{entry.type}}{#{character}}\n" + handle << "\\registerentry{#{entry.type}}{#{character}}%\n" end end - current = [entry.entry.split(@@split),'','',''].flatten + current = [entry.entry.split(@@split),'','','',''].flatten howto = current.collect do |e| e + '::' + entry.texthowto end @@ -724,38 +724,51 @@ end previous[0] = howto[0].dup previous[1] = '' previous[2] = '' + previous[3] = '' end if howto[1] == previous[1] then current[1] = '' else previous[1] = howto[1].dup previous[2] = '' + previous[3] = '' end if howto[2] == previous[2] then current[2] = '' else previous[2] = howto[2].dup + previous[3] = '' + end + if howto[3] == previous[3] then + current[3] = '' + else + previous[3] = howto[3].dup end copied = false unless current[0].empty? then Register.flushsavedline(handle) - handle << "\\registerentrya{#{entry.type}}{#{current[0]}}\n" + handle << "\\registerentrya{#{entry.type}}{#{current[0]}}%\n" copied = true end unless current[1].empty? then Register.flushsavedline(handle) - handle << "\\registerentryb{#{entry.type}}{#{current[1]}}\n" + handle << "\\registerentryb{#{entry.type}}{#{current[1]}}%\n" copied = true end unless current[2].empty? then Register.flushsavedline(handle) - handle << "\\registerentryc{#{entry.type}}{#{current[2]}}\n" + handle << "\\registerentryc{#{entry.type}}{#{current[2]}}%\n" + copied = true + end + unless current[3].empty? then + Register.flushsavedline(handle) + handle << "\\registerentryd{#{entry.type}}{#{current[3]}}%\n" copied = true end @nofentries += 1 if copied if entry.realpage.to_i == 0 then Register.flushsavedline(handle) - handle << "\\registersee{#{entry.type}}{#{entry.pagehowto},#{entry.texthowto}}{#{entry.seetoo}}{#{entry.page}}\n" ; + handle << "\\registersee{#{entry.type}}{#{entry.pagehowto},#{entry.texthowto}}{#{entry.seetoo}}{#{entry.page}}%\n" ; lastpage, lastrealpage = entry.page, entry.realpage copied = false # no page ! elsif @@savedhowto != entry.pagehowto and ! entry.pagehowto.empty? then @@ -763,14 +776,14 @@ end end # beware, we keep multiple page entries per realpage because of possible prefix usage if copied || ! ((lastpage == entry.page) && (lastrealpage == entry.realpage)) then - nextentry = "{#{entry.type}}{#{previous[0]}}{#{previous[1]}}{#{previous[2]}}{#{entry.pagehowto},#{entry.texthowto}}" + nextentry = "{#{entry.type}}{#{previous[0]}}{#{previous[1]}}{#{previous[2]}}{#{previous[3]}}{#{entry.pagehowto},#{entry.texthowto}}" savedline = "{#{entry.type}}{#{@@savedhowto},#{entry.texthowto}}{#{entry.location}}{#{entry.page}}{#{entry.realpage}}" if entry.state == 1 then # from Register.flushsavedline(handle) - handle << "\\registerfrom#{savedline}\n" + handle << "\\registerfrom#{savedline}%\n" elsif entry.state == 3 then # to Register.flushsavedline(handle) - handle << "\\registerto#{savedline}\n" + handle << "\\registerto#{savedline}%\n" @@savedhowto = '' # test elsif @@collapse then if savedentry != nextentry then @@ -779,7 +792,7 @@ end savedTo, savedentry = savedline, nextentry end else - handle << "\\registerpage#{savedline}\n" + handle << "\\registerpage#{savedline}%\n" @@savedhowto = '' # test end @nofpages += 1 @@ -1027,6 +1040,7 @@ end begin if f = File.open(File.suffixed(filename,'tuo'),'w') then @plugins.writers(f) + f << "\\endinput\n" f.close end rescue diff --git a/scripts/context/ruby/www/dir.rb b/scripts/context/ruby/www/dir.rb index 09e088d77..115fd8911 100644 --- a/scripts/context/ruby/www/dir.rb +++ b/scripts/context/ruby/www/dir.rb @@ -62,9 +62,9 @@ class WWW end u = dir_uri(@variables.get('path') || '.') str << "
\n
\n"
-                        str << "name".ljust(49+u.length)
-                        str << "last modified".ljust(41+u.length)
-                        str << "size".rjust(31+u.length) << "\n" << "\n"
+                        str << "name".ljust(49+u.length)
+                        str << "last modified".ljust(41+u.length)
+                        str << "size".rjust(31+u.length) << "\n" << "\n"
                         # parent path
                         if showdirs && ! hidden.include?('..') then
                             dname = "parent directory"
-- 
cgit v1.2.3