diff options
author | Hans Hagen <pragma@wxs.nl> | 2009-10-28 19:27:00 +0100 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2009-10-28 19:27:00 +0100 |
commit | 17edf6ae96ce4e5e8eeaadc47bb3abc321fc2f6e (patch) | |
tree | 1f8e2a694ecbf03a6a534cd523fbbf647bf91055 /scripts | |
parent | 9da04bd6fa363277da00d82aa369e51a2e4202ae (diff) | |
download | context-17edf6ae96ce4e5e8eeaadc47bb3abc321fc2f6e.tar.gz |
beta 2009.10.28 19:27
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/context/lua/mtxrun.lua | 173 | ||||
-rw-r--r-- | scripts/context/lua/scite-ctx.lua | 193 | ||||
-rw-r--r-- | scripts/context/stubs/mswin/mtxrun.lua | 173 | ||||
-rwxr-xr-x | scripts/context/stubs/unix/mtxrun | 173 |
4 files changed, 436 insertions, 276 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index cbb27098d..7507b5e6e 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -1919,6 +1919,7 @@ function file.collapse_path(str) return str end +--~ print(file.collapse_path("/a")) --~ print(file.collapse_path("a/./b/..")) --~ print(file.collapse_path("a/aa/../b/bb")) --~ print(file.collapse_path("a/../..")) @@ -3517,8 +3518,9 @@ element.</p> local nsremap, resolvens = xml.xmlns, xml.resolvens -local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {} +local stack, top, dt, at, xmlns, errorstr, entities = { }, { }, { }, { }, { }, nil, { } local strip, cleanup, utfize, resolve = false, false, false, false +local dcache, hcache, acache = { }, { }, { } local mt = { } @@ -3638,8 +3640,6 @@ local function attribute_specification_error(str) return str end -local dcache, hcache, acache = { }, { }, { } - function xml.unknown_dec_entity_format(str) return format("&%s;", str) end function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end function xml.unknown_any_entity_format(str) return format("&%s;", str) end @@ -3659,12 +3659,13 @@ local function handle_hex_entity(str) if trace_entities then logs.report("xml","found entity &#x%s;",str) end - h = "&#" .. str .. ";" + h = "&#c" .. str .. ";" end hcache[str] = h end return h end + local function handle_dec_entity(str) local d = dcache[str] if not d then @@ -3680,30 +3681,77 @@ local function handle_dec_entity(str) if trace_entities then logs.report("xml","found entity &#%s;",str) end - d = "&" .. str .. ";" + d = "&#" .. str .. ";" end dcache[str] = d end return d end + +-- one level expansion (simple case) + +local function fromhex(s) + local n = tonumber(s,16) + if n then + return utfchar(n) + else + return format("h:%s",s), true + end +end + +local function fromdec(s) + local n = tonumber(s) + if n then + return utfchar(n) + else + return format("d:%s",s), true + end +end + +local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs + +local rest = (1-P(";"))^0 +local many = P(1)^0 + +local parsedentity = + P("&") * (P("#x")*(rest/fromhex) + P("#")*(rest/fromdec)) * P(";") * P(-1) + + (P("#x")*(many/fromhex) + P("#")*(many/fromdec)) + +xml.parsedentitylpeg = parsedentity + local function handle_any_entity(str) if resolve then - local a = entities[str] -- per instance ! + local a = acache[str] -- per instance ! todo if not a then - a = acache[str] - if not a then + if type(resolve) == "function" then + a = resolve(str) or entities[str] + else + a = entities[str] + end + if a then if trace_entities then - logs.report("xml","ignoring entity &%s;",str) + logs.report("xml","resolved entity &%s; -> %s (internal)",str,a) + end + a = parsedentity:match(a) or a + else + if xml.unknown_any_entity_format then + a = xml.unknown_any_entity_format(str) or "" + end + if a then + if trace_entities then + logs.report("xml","resolved entity &%s; -> %s (external)",str,a) + end else - -- can be defined in a global mapper and intercepted elsewhere - -- as happens in lxml-tex.lua + if trace_entities then + logs.report("xml","keeping entity &%s;",str) + end + a = "&" .. str .. ";" end - a = xml.unknown_any_entity_format(str) or "" - acache[str] = a end + acache[str] = a elseif trace_entities then if not acache[str] then - logs.report("xml","converting entity &%s; into %s",str,r) + logs.report("xml","converting entity &%s; into %s",str,a) acache[str] = a end end @@ -3721,8 +3769,6 @@ local function handle_any_entity(str) end end -local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs - local space = S(' \r\n\t') local open = P('<') local close = P('>') @@ -3744,12 +3790,11 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') + local spacing = C(space^0) local entitycontent = (1-open-semicolon)^0 -local entity = ampersand/"" * ( - P("#")/"" * ( +local parsedentity = P("#")/"" * ( P("x")/"" * (entitycontent/handle_hex_entity) + (entitycontent/handle_dec_entity) ) + (entitycontent/handle_any_entity) - ) * (semicolon/"") +local entity = ampersand/"" * parsedentity * (semicolon/"") local text_unparsed = C((1-open)^1) local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1) @@ -3848,7 +3893,8 @@ local function xmlconvert(data, settings) utfize = settings.utfize_entities resolve = settings.resolve_entities cleanup = settings.text_cleanup - stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {} + stack, top, at, xmlns, errorstr, result, entities = { }, { }, { }, { }, nil, nil, settings.entities or { } + acache, hcache, dcache = { }, { }, { } -- not stored reported_attribute_errors = { } if settings.parent_root then mt = getmetatable(settings.parent_root) @@ -4572,9 +4618,9 @@ apply_axis['descendant-or-self'] = function(list) local collected = { } for l=1,#list do local ll = list[l] -if ll.special ~= true then -- catch double root - collected[#collected+1] = ll -end + if ll.special ~= true then -- catch double root + collected[#collected+1] = ll + end collect(ll,collected) end return collected @@ -4663,7 +4709,7 @@ local function apply_nodes(list,directive,nodes) return { } end else - local collected = { } + local collected, m, p = { }, 0, nil if not nns then -- only check tag for l=1,#list do local ll = list[l] @@ -4671,10 +4717,12 @@ local function apply_nodes(list,directive,nodes) if ltg then if directive then if ntg == ltg then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif ntg ~= ltg then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4685,10 +4733,12 @@ local function apply_nodes(list,directive,nodes) if lns then if directive then if lns == nns then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif lns ~= nns then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4701,10 +4751,12 @@ local function apply_nodes(list,directive,nodes) local ok = ltg == ntg and lns == nns if directive then if ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif not ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4712,7 +4764,7 @@ local function apply_nodes(list,directive,nodes) return collected end else - local collected = { } + local collected, m, p = { }, 0, nil for l=1,#list do local ll = list[l] local ltg = ll.tg @@ -4728,10 +4780,12 @@ local function apply_nodes(list,directive,nodes) end if directive then if ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif not ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4752,31 +4806,29 @@ end local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb -local spaces = S(" \n\r\t\f")^0 - -local lp_space = S(" \n\r\t\f") -local lp_any = P(1) - -local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==") -local lp_doequal = P("=") / "==" -local lp_or = P("|") / " or " -local lp_and = P("&") / " and " +local spaces = S(" \n\r\t\f")^0 +local lp_space = S(" \n\r\t\f") +local lp_any = P(1) +local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==") +local lp_doequal = P("=") / "==" +local lp_or = P("|") / " or " +local lp_and = P("&") / " and " local lp_builtin = P ( P("first") / "1" + P("last") / "#list" + P("position") / "l" + P("rootposition") / "order" + - P("index") / "ll.ni" + + P("index") / "(ll.ni or 1)" + + P("match") / "(ll.mi or 1)" + P("text") / "(ll.dt[1] or '')" + P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" + P("tag") / "ll.tg" + P("ns") / "ll.ns" ) * ((spaces * P("(") * spaces * P(")"))/"") -local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * R("az","AZ","--","__")^1 * Cc("'])") -local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end) - +local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * R("az","AZ","--","__")^1 * Cc("'])") +local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end) local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false") local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling @@ -4797,9 +4849,9 @@ local noparent = 1 - (lparent+rparent) local nested = lpeg.P{lparent * (noparent + lpeg.V(1))^0 * rparent} local value = lpeg.P(lparent * lpeg.C((noparent + nested)^0) * rparent) -- lpeg.P{"("*C(((1-S("()"))+V(1))^0)*")"} -local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')") -local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'") -local lp_content= (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"')) +local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')") +local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'") +local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"')) local cleaner @@ -4941,7 +4993,9 @@ local parser = Ct { "patterns", -- can be made a bit faster by moving pattern ou protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"), - step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + -- the / is needed for // as descendant or self is somewhat special + -- step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + step = ((V("shortcuts") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") + V("descendant_or_self") + V("following") + V("following_sibling") + @@ -4956,13 +5010,14 @@ local parser = Ct { "patterns", -- can be made a bit faster by moving pattern ou shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0, - s_descendant_or_self = P("/") * Cc(register_descendant_or_self), - s_descendant = P("**") * Cc(register_descendant), - s_child = P("*") * Cc(register_child ), - s_parent = P("..") * Cc(register_parent ), - s_self = P("." ) * Cc(register_self ), - s_root = P("^^") * Cc(register_root ), - s_ancestor = P("^") * Cc(register_ancestor ), + s_descendant_or_self = (P("***/") + P("/")) * Cc(register_descendant_or_self), --- *** is a bonus + -- s_descendant_or_self = P("/") * Cc(register_descendant_or_self), + s_descendant = P("**") * Cc(register_descendant), + s_child = P("*") * #(1-P(":")) * Cc(register_child ), + s_parent = P("..") * Cc(register_parent ), + s_self = P("." ) * Cc(register_self ), + s_root = P("^^") * Cc(register_root ), + s_ancestor = P("^") * Cc(register_ancestor ), descendant = P("descendant::") * Cc(register_descendant ), child = P("child::") * Cc(register_child ), @@ -5154,7 +5209,7 @@ local function traced_apply(list,parsed,nofparsed) logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest)) elseif kind == "expression" then collected = apply_expression(collected,pi.evaluator,i) - logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression) + logs.report("lpath", "% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted) elseif kind == "finalizer" then collected = pi.finalizer(collected) logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "") @@ -6091,7 +6146,7 @@ end local function text(collected) if collected then - return xmltostring(collected[1]) -- only first as we cannot concat function + return xmltostring(collected[1].dt) -- only first as we cannot concat function else return "" end diff --git a/scripts/context/lua/scite-ctx.lua b/scripts/context/lua/scite-ctx.lua index 1b8329289..fb10ce87d 100644 --- a/scripts/context/lua/scite-ctx.lua +++ b/scripts/context/lua/scite-ctx.lua @@ -68,6 +68,11 @@ -- generic functions +props = props or { } setmetatable(props,{ __index = function(k,v) props[k] = "unknown" return "unknown" end } ) + +local byte, lower, upper, gsub, sub, find, rep, match, gmatch = string.byte, string.lower, string.upper, string.gsub, string.sub, string.find, string.rep, string.match, string.gmatch +local sort, concat = table.sort, table.concat + local crlf = "\n" function traceln(str) @@ -75,26 +80,6 @@ function traceln(str) io.flush() end -function table.found(tab, str) - local l, r, p - if #str == 0 then - return false - else - l, r = 1, #tab - while l <= r do - p = math.floor((l+r)/2) - if str < tab[p] then - r = p - 1 - elseif str > tab[p] then - l = p + 1 - else - return true - end - end - return false - end -end - function string:grab(delimiter) local list = {} for snippet in self:gmatch(delimiter) do @@ -103,10 +88,6 @@ function string:grab(delimiter) return list end -function string:is_empty() - return not self:find("%S") -end - function string:expand() return (self:gsub("ENV%((%w+)%)", os.envvar)) end @@ -115,24 +96,18 @@ function string:strip() return (self:gsub("^%s*(.-)%s*$", "%1")) end -do - - local lower, gsub, sub = string.lower, string.gsub, string.sub - - function table.alphasort(list,i) - if i and i > 0 then - local function alphacmp(a,b) - return lower(gsub(sub(a,i),'0',' ')) < lower(gsub(sub(b,i),'0',' ')) - end - table.sort(list,alphacmp) - else - local function alphacmp(a,b) - return a:lower() < b:lower() - end - table.sort(list,alphacmp) +function table.alphasort(list,i) + if i and i > 0 then + local function alphacmp(a,b) + return lower(gsub(sub(a,i),'0',' ')) < lower(gsub(sub(b,i),'0',' ')) end + sort(list,alphacmp) + else + local function alphacmp(a,b) + return lower(a) < lower(b) + end + sort(list,alphacmp) end - end function io.exists(filename) @@ -150,11 +125,11 @@ function os.envvar(str) if s ~= '' then return s end - s = os.getenv(str:upper()) + s = os.getenv(upper(str)) if s ~= '' then return s end - s = os.getenv(str:lower()) + s = os.getenv(lower(str)) if s ~= '' then return s end @@ -217,9 +192,9 @@ function getfiletype() return 'tex' elseif editor.Lexer == SCLEX_XML then return 'xml' - elseif firstline:find("^%%") then + elseif find(firstline,"^%%") then return 'tex' - elseif firstline:find("^<%?xml") then + elseif find(firstline,"^<%?xml") then return 'xml' else return 'unknown' @@ -233,7 +208,7 @@ function get_dir_list(mask) if props['PLAT_GTK'] and props['PLAT_GTK'] ~= "" then f = io.popen('ls -1 ' .. mask) else - mask = mask:gsub('/','\\') + mask = gsub(mask,'/','\\') local tmpfile = 'scite-ctx.tmp' local cmd = 'dir /b "' .. mask .. '" > ' .. tmpfile os.execute(cmd) @@ -257,7 +232,7 @@ do print("loading scite-ctx.lua definition file\n") print("- see scite-ctx.properties for configuring info\n") print("- ctx.spellcheck.wordpath set to " .. props['ctx.spellcheck.wordpath']) - if (props['ctx.spellcheck.wordpath']:lower()):find("ctxspellpath") then + if find(lower(props['ctx.spellcheck.wordpath']),"ctxspellpath") then if os.getenv('ctxspellpath') then print("- ctxspellpath set to " .. os.getenv('CTXSPELLPATH')) else @@ -268,7 +243,7 @@ do print("\n- ctx.wraptext.length is set to " .. props['ctx.wraptext.length']) if props['ctx.helpinfo'] ~= '' then print("\n- key bindings:\n") - print(((string.strip(props['ctx.helpinfo'])):gsub("%s*\|%s*","\n"))) + print((gsub(string.strip(props['ctx.helpinfo']),"%s*\|%s*","\n"))) end print("\n- recognized first lines:\n") print("xml <?xml version='1.0' language='nl'") @@ -280,7 +255,7 @@ end -- written while listening to Talk Talk -local magicstring = string.rep("<ctx-crlf/>", 2) +local magicstring = rep("<ctx-crlf/>", 2) function wrap_text() @@ -315,14 +290,14 @@ function wrap_text() local replacement = { } local templine = '' - local indentation = string.rep(' ',startcolumn) + local indentation = rep(' ',startcolumn) local selection = editor:GetSelText() - selection = selection:gsub("[\n\r][\n\r]","\n") - selection = selection:gsub("\n\n+",' ' .. magicstring .. ' ') - selection = selection:gsub("^%s",'') + selection = gsub(selection,"[\n\r][\n\r]","\n") + selection = gsub(selection,"\n\n+",' ' .. magicstring .. ' ') + selection = gsub(selection,"^%s",'') - for snippet in selection:gmatch("%S+") do + for snippet in gmatch(selection,"%S+") do if snippet == magicstring then replacement[#replacement+1] = templine replacement[#replacement+1] = "" @@ -338,13 +313,13 @@ function wrap_text() end replacement[#replacement+1] = templine - replacement[1] = replacement[1]:gsub("^%s+",'') + replacement[1] = gsub(replacement[1],"^%s+",'') if endcolumn == 0 then replacement[#replacement+1] = "" end - editor:ReplaceSel(table.concat(replacement,"\n")) + editor:ReplaceSel(concat(replacement,"\n")) end @@ -361,11 +336,11 @@ function unwrap_text() startposition = editor.SelectionStart endposition = editor.SelectionEnd - local magicstring = string.rep("<multiplelines/>", 2) - local selection = string.gsub(editor:GetSelText(),"[\n\r][\n\r]+", ' ' .. magicstring .. ' ') + local magicstring = rep("<multiplelines/>", 2) + local selection = gsub(editor:GetSelText(),"[\n\r][\n\r]+", ' ' .. magicstring .. ' ') local replacement = '' - for snippet in selection:gmatch("%S+") do + for snippet in gmatch(selection,"%S+") do if snippet == magicstring then replacement = replacement .. "\n" else @@ -399,11 +374,11 @@ function sort_text() startposition = extend_to_start() endposition = extend_to_end() - local selection = string.gsub(editor:GetSelText(), "%s*$", '') + local selection = gsub(editor:GetSelText(), "%s*$", '') list = string.grab(selection,"[^\n\r]+") table.alphasort(list, startcolumn) - local replacement = table.concat(list, "\n") + local replacement = concat(list, "\n") editor:GotoPos(startposition) editor:SetSel(startposition,endposition) @@ -414,6 +389,26 @@ function sort_text() end +function uncomment_xml() + + local startposition = editor.SelectionStart + local endposition = editor.SelectionEnd + + if startposition == endposition then return end + + local startposition = editor.SelectionStart + local endposition = editor.SelectionEnd + + local selection = gsub(editor:GetSelText(), "%<%!%-%-.-%-%-%>", '') + + editor:GotoPos(startposition) + editor:SetSel(startposition,endposition) + + editor:ReplaceSel(selection) + editor:GotoPos(startposition) + +end + function document_text() local startposition = editor.SelectionStart @@ -432,25 +427,25 @@ function document_text() for i = editor:LineFromPosition(startposition), editor:LineFromPosition(endposition) do local str = editor:GetLine(i) if filetype == 'xml' then - if str:find("^<%!%-%- .* %-%->%s*$") then - replacement = replacement .. str:gsub("^<%!%-%- (.*) %-%->(%s*)$","%1\n") - elseif not str:is_empty() then - replacement = replacement .. '<!-- ' .. str:gsub("(%s*)$",'') .. " -->\n" + if find(str,"^<%!%-%- .* %-%->%s*$") then + replacement = replacement .. gsub(str,"^<%!%-%- (.*) %-%->(%s*)$","%1\n") + elseif find(str,"%S") then + replacement = replacement .. '<!-- ' .. gsub(str,"(%s*)$",'') .. " -->\n" else replacement = replacement .. str end else - if str:find("^%%D%s+$") then + if find(str,"^%%D%s+$") then replacement = replacement .. "\n" - elseif str:find("^%%D ") then - replacement = replacement .. str:gsub("^%%D ",'') + elseif find(str,"^%%D ") then + replacement = replacement .. gsub(str,"^%%D ",'') else replacement = replacement .. '%D ' .. str end end end - editor:ReplaceSel(replacement:gsub("[\n\r]$",'')) + editor:ReplaceSel(gsub(replacement,"[\n\r]$",'')) end @@ -467,10 +462,10 @@ function quote_text() end local replacement = editor:GetSelText() - replacement = replacement.gsub("\`\`(.-)\'\'", leftquotation .. "%1" .. rightquotation) - replacement = replacement.gsub("\"(.-)\"", leftquotation .. "%1" .. rightquotation) - replacement = replacement.gsub("\`(.-)\'", leftquote .. "%1" .. rightquote ) - replacement = replacement.gsub("\'(.-)\'", leftquote .. "%1" .. rightquote ) + replacement = gsub(replacement,"\`\`(.-)\'\'", leftquotation .. "%1" .. rightquotation) + replacement = gsub(replacement,"\"(.-)\"", leftquotation .. "%1" .. rightquotation) + replacement = gsub(replacement,"\`(.-)\'", leftquote .. "%1" .. rightquote ) + replacement = gsub(replacement,"\'(.-)\'", leftquote .. "%1" .. rightquote ) editor:ReplaceSel(replacement) end @@ -480,9 +475,9 @@ function compound_text() local filetype = getfiletype() if filetype == 'xml' then - editor:ReplaceSel(string.gsub(editor:GetSelText(),"(>[^<%-][^<%-]+)([-\/])(%w%w+)","%1<compound token='%2'/>%3")) + editor:ReplaceSel(gsub(editor:GetSelText(),"(>[^<%-][^<%-]+)([-\/])(%w%w+)","%1<compound token='%2'/>%3")) else - editor:ReplaceSel(string.gsub(editor:GetSelText(),"([^\|])([-\/]+)([^\|])","%1|%2|%3")) + editor:ReplaceSel(gsub(editor:GetSelText(),"([^\|])([-\/]+)([^\|])","%1|%2|%3")) end end @@ -517,7 +512,7 @@ local worddone = 0 -- local command = "kpsewhich" .. progflag .. typeflag .. " " .. filename .. " > " .. tempname -- os.execute(command) -- for line in io.lines(tempname) do --- return string.gsub(line, "\s*$", '') +-- return gsub(line, "\s*$", '') -- end -- end @@ -543,9 +538,9 @@ function check_text() if props["ctx.spellcheck.language"] == 'auto' then if filetype == 'tex' then -- % version =1.0 language=uk - firstline = firstline:gsub("^%%%s*",'') - firstline = firstline:gsub("%s*$",'') - for key, val in firstline:gmatch("(%w+)=(%w+)") do + firstline = gsub(firstline,"^%%%s*",'') + firstline = gsub(firstline,"%s*$",'') + for key, val in gmatch(firstline,"(%w+)=(%w+)") do if key == "language" then language = val traceln("auto document language " .. "'" .. language .. "' (tex)") @@ -554,9 +549,9 @@ function check_text() skipfirst = true elseif filetype == 'xml' then -- <?xml version='1.0' language='uk' ?> - firstline = firstline:gsub("^%<%?xml%s*", '') - firstline = firstline:gsub("%s*%?%>%s*$", '') - for key, val in firstline:gmatch("(%w+)=[\"\'](.-)[\"\']") do + firstline = gsub(firstline,"^%<%?xml%s*", '') + firstline = gsub(firstline,"%s*%?%>%s*$", '') + for key, val in gmatch(firstline,"(%w+)=[\"\'](.-)[\"\']") do if key == "language" then language = val traceln("auto document language " .. "'" .. language .. "' (xml)") @@ -573,15 +568,15 @@ function check_text() if fname ~= '' and fname ~= wordfile then wordfile, worddone, wordlist = fname, 0, {} - for filename in wordfile:gmatch("[^%,]+") do + for filename in gmatch(wordfile,"[^%,]+") do if wordpath ~= '' then filename = string.expand(wordpath) .. '/' .. filename end if io.exists(filename) then traceln("loading " .. filename) for line in io.lines(filename) do - if not line:find("^[\%\#\-]") then - str = line:gsub("%s*$", '') + if not find(line,"^[\%\#\-]") then + str = gsub(line,"%s*$", '') rawset(wordlist,str,true) worddone = worddone + 1 end @@ -612,7 +607,7 @@ function check_text() elseif ch == wordskip then skip = true end - if ch:find("%w") and not ch:find("%d") then + if find(ch,"%w") and not find(ch,"%d") then if not skip then if ok then endpos = k @@ -627,7 +622,7 @@ function check_text() if len >= wordsize then snippet = editor:textrange(startpos,endpos+1) i = i + 1 - if wordlist[snippet] or wordlist[snippet:lower()] then -- table.found(wordlist,snippet) + if wordlist[snippet] or wordlist[lower(snippet)] then j = j + 1 else editor:StartStyling(startpos,INDICS_MASK) @@ -660,19 +655,19 @@ function UserListShow(menutrigger, menulist) menuactions = {} for i=1, #list do if list[i] ~= '' then - for key, val in list[i]:gmatch("%s*(.+)=(.+)%s*") do + for key, val in gmatch(list[i],"%s*(.+)=(.+)%s*") do menuentries[#menuentries+1] = key menuactions[key] = val end end end - local menustring = table.concat(menuentries,'|') + local menustring = concat(menuentries,'|') if menustring == "" then traceln("There are no templates defined for this file type.") else - editor.AutoCSeparator = string.byte('|') + editor.AutoCSeparator = byte('|') editor:UserListShow(menutrigger,menustring) - editor.AutoCSeparator = string.byte(' ') + editor.AutoCSeparator = byte(' ') end end @@ -693,7 +688,7 @@ function show_menu(menulist) end function process_menu(action) - if not action:find("%(%)$") then + if not find(action,"%(%)$") then assert(loadstring(action .. "()"))() else assert(loadstring(action))() @@ -731,7 +726,7 @@ function insert_template(templatelist) if not ctx_path_done[path] or rescan then local pattern = "*.*" for i, pathname in ipairs(ctx_template_paths) do - print("scanning " .. path:gsub("\\","/") .. "/" .. pathname) + print("scanning " .. gsub(path,"\\","/") .. "/" .. pathname) ctx_path_name[path] = pathname ctx_path_list[path] = get_dir_list(pathname .. "/" .. pattern) if ctx_list_loaded(path) then @@ -750,9 +745,9 @@ function insert_template(templatelist) local pattern = "%." .. suffix .. "$" local n = 0 for j, filename in ipairs(ctx_path_list[path]) do - if filename:find(pattern) then + if find(filename,pattern) then n = n + 1 - local menuname = filename:gsub("%..-$","") + local menuname = gsub(filename,"%..-$","") if ctx_template_list ~= "" then ctx_template_list = ctx_template_list .. "|" end @@ -789,7 +784,7 @@ function process_template_one(action) if ctx_auto_templates then local f = io.open(action,"r") if f then - text = string.gsub(f:read("*all"),"\n$","") + text = gsub(f:read("*all"),"\n$","") f:close() else print("unable to auto load template file " .. text) @@ -806,7 +801,7 @@ function process_template_one(action) else local f = io.open(text,"r") if f then - text = string.gsub(f:read("*all"),"\n$","") + text = gsub(f:read("*all"),"\n$","") f:close() else print("unable to load template file " .. text) @@ -815,9 +810,9 @@ function process_template_one(action) end end if text then - text = text:gsub("\\n","\n") - local pos = text:find("%?") - text = text:gsub("%?","") + text = gsub(text,"\\n","\n") + local pos = find(text,"%?") + text = gsub(text,"%?","") editor:insert(editor.CurrentPos,text) if pos then editor.CurrentPos = editor.CurrentPos + pos - 1 diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index cbb27098d..7507b5e6e 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -1919,6 +1919,7 @@ function file.collapse_path(str) return str end +--~ print(file.collapse_path("/a")) --~ print(file.collapse_path("a/./b/..")) --~ print(file.collapse_path("a/aa/../b/bb")) --~ print(file.collapse_path("a/../..")) @@ -3517,8 +3518,9 @@ element.</p> local nsremap, resolvens = xml.xmlns, xml.resolvens -local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {} +local stack, top, dt, at, xmlns, errorstr, entities = { }, { }, { }, { }, { }, nil, { } local strip, cleanup, utfize, resolve = false, false, false, false +local dcache, hcache, acache = { }, { }, { } local mt = { } @@ -3638,8 +3640,6 @@ local function attribute_specification_error(str) return str end -local dcache, hcache, acache = { }, { }, { } - function xml.unknown_dec_entity_format(str) return format("&%s;", str) end function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end function xml.unknown_any_entity_format(str) return format("&%s;", str) end @@ -3659,12 +3659,13 @@ local function handle_hex_entity(str) if trace_entities then logs.report("xml","found entity &#x%s;",str) end - h = "&#" .. str .. ";" + h = "&#c" .. str .. ";" end hcache[str] = h end return h end + local function handle_dec_entity(str) local d = dcache[str] if not d then @@ -3680,30 +3681,77 @@ local function handle_dec_entity(str) if trace_entities then logs.report("xml","found entity &#%s;",str) end - d = "&" .. str .. ";" + d = "&#" .. str .. ";" end dcache[str] = d end return d end + +-- one level expansion (simple case) + +local function fromhex(s) + local n = tonumber(s,16) + if n then + return utfchar(n) + else + return format("h:%s",s), true + end +end + +local function fromdec(s) + local n = tonumber(s) + if n then + return utfchar(n) + else + return format("d:%s",s), true + end +end + +local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs + +local rest = (1-P(";"))^0 +local many = P(1)^0 + +local parsedentity = + P("&") * (P("#x")*(rest/fromhex) + P("#")*(rest/fromdec)) * P(";") * P(-1) + + (P("#x")*(many/fromhex) + P("#")*(many/fromdec)) + +xml.parsedentitylpeg = parsedentity + local function handle_any_entity(str) if resolve then - local a = entities[str] -- per instance ! + local a = acache[str] -- per instance ! todo if not a then - a = acache[str] - if not a then + if type(resolve) == "function" then + a = resolve(str) or entities[str] + else + a = entities[str] + end + if a then if trace_entities then - logs.report("xml","ignoring entity &%s;",str) + logs.report("xml","resolved entity &%s; -> %s (internal)",str,a) + end + a = parsedentity:match(a) or a + else + if xml.unknown_any_entity_format then + a = xml.unknown_any_entity_format(str) or "" + end + if a then + if trace_entities then + logs.report("xml","resolved entity &%s; -> %s (external)",str,a) + end else - -- can be defined in a global mapper and intercepted elsewhere - -- as happens in lxml-tex.lua + if trace_entities then + logs.report("xml","keeping entity &%s;",str) + end + a = "&" .. str .. ";" end - a = xml.unknown_any_entity_format(str) or "" - acache[str] = a end + acache[str] = a elseif trace_entities then if not acache[str] then - logs.report("xml","converting entity &%s; into %s",str,r) + logs.report("xml","converting entity &%s; into %s",str,a) acache[str] = a end end @@ -3721,8 +3769,6 @@ local function handle_any_entity(str) end end -local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs - local space = S(' \r\n\t') local open = P('<') local close = P('>') @@ -3744,12 +3790,11 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') + local spacing = C(space^0) local entitycontent = (1-open-semicolon)^0 -local entity = ampersand/"" * ( - P("#")/"" * ( +local parsedentity = P("#")/"" * ( P("x")/"" * (entitycontent/handle_hex_entity) + (entitycontent/handle_dec_entity) ) + (entitycontent/handle_any_entity) - ) * (semicolon/"") +local entity = ampersand/"" * parsedentity * (semicolon/"") local text_unparsed = C((1-open)^1) local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1) @@ -3848,7 +3893,8 @@ local function xmlconvert(data, settings) utfize = settings.utfize_entities resolve = settings.resolve_entities cleanup = settings.text_cleanup - stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {} + stack, top, at, xmlns, errorstr, result, entities = { }, { }, { }, { }, nil, nil, settings.entities or { } + acache, hcache, dcache = { }, { }, { } -- not stored reported_attribute_errors = { } if settings.parent_root then mt = getmetatable(settings.parent_root) @@ -4572,9 +4618,9 @@ apply_axis['descendant-or-self'] = function(list) local collected = { } for l=1,#list do local ll = list[l] -if ll.special ~= true then -- catch double root - collected[#collected+1] = ll -end + if ll.special ~= true then -- catch double root + collected[#collected+1] = ll + end collect(ll,collected) end return collected @@ -4663,7 +4709,7 @@ local function apply_nodes(list,directive,nodes) return { } end else - local collected = { } + local collected, m, p = { }, 0, nil if not nns then -- only check tag for l=1,#list do local ll = list[l] @@ -4671,10 +4717,12 @@ local function apply_nodes(list,directive,nodes) if ltg then if directive then if ntg == ltg then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif ntg ~= ltg then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4685,10 +4733,12 @@ local function apply_nodes(list,directive,nodes) if lns then if directive then if lns == nns then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif lns ~= nns then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4701,10 +4751,12 @@ local function apply_nodes(list,directive,nodes) local ok = ltg == ntg and lns == nns if directive then if ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif not ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4712,7 +4764,7 @@ local function apply_nodes(list,directive,nodes) return collected end else - local collected = { } + local collected, m, p = { }, 0, nil for l=1,#list do local ll = list[l] local ltg = ll.tg @@ -4728,10 +4780,12 @@ local function apply_nodes(list,directive,nodes) end if directive then if ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif not ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4752,31 +4806,29 @@ end local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb -local spaces = S(" \n\r\t\f")^0 - -local lp_space = S(" \n\r\t\f") -local lp_any = P(1) - -local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==") -local lp_doequal = P("=") / "==" -local lp_or = P("|") / " or " -local lp_and = P("&") / " and " +local spaces = S(" \n\r\t\f")^0 +local lp_space = S(" \n\r\t\f") +local lp_any = P(1) +local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==") +local lp_doequal = P("=") / "==" +local lp_or = P("|") / " or " +local lp_and = P("&") / " and " local lp_builtin = P ( P("first") / "1" + P("last") / "#list" + P("position") / "l" + P("rootposition") / "order" + - P("index") / "ll.ni" + + P("index") / "(ll.ni or 1)" + + P("match") / "(ll.mi or 1)" + P("text") / "(ll.dt[1] or '')" + P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" + P("tag") / "ll.tg" + P("ns") / "ll.ns" ) * ((spaces * P("(") * spaces * P(")"))/"") -local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * R("az","AZ","--","__")^1 * Cc("'])") -local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end) - +local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * R("az","AZ","--","__")^1 * Cc("'])") +local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end) local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false") local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling @@ -4797,9 +4849,9 @@ local noparent = 1 - (lparent+rparent) local nested = lpeg.P{lparent * (noparent + lpeg.V(1))^0 * rparent} local value = lpeg.P(lparent * lpeg.C((noparent + nested)^0) * rparent) -- lpeg.P{"("*C(((1-S("()"))+V(1))^0)*")"} -local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')") -local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'") -local lp_content= (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"')) +local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')") +local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'") +local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"')) local cleaner @@ -4941,7 +4993,9 @@ local parser = Ct { "patterns", -- can be made a bit faster by moving pattern ou protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"), - step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + -- the / is needed for // as descendant or self is somewhat special + -- step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + step = ((V("shortcuts") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") + V("descendant_or_self") + V("following") + V("following_sibling") + @@ -4956,13 +5010,14 @@ local parser = Ct { "patterns", -- can be made a bit faster by moving pattern ou shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0, - s_descendant_or_self = P("/") * Cc(register_descendant_or_self), - s_descendant = P("**") * Cc(register_descendant), - s_child = P("*") * Cc(register_child ), - s_parent = P("..") * Cc(register_parent ), - s_self = P("." ) * Cc(register_self ), - s_root = P("^^") * Cc(register_root ), - s_ancestor = P("^") * Cc(register_ancestor ), + s_descendant_or_self = (P("***/") + P("/")) * Cc(register_descendant_or_self), --- *** is a bonus + -- s_descendant_or_self = P("/") * Cc(register_descendant_or_self), + s_descendant = P("**") * Cc(register_descendant), + s_child = P("*") * #(1-P(":")) * Cc(register_child ), + s_parent = P("..") * Cc(register_parent ), + s_self = P("." ) * Cc(register_self ), + s_root = P("^^") * Cc(register_root ), + s_ancestor = P("^") * Cc(register_ancestor ), descendant = P("descendant::") * Cc(register_descendant ), child = P("child::") * Cc(register_child ), @@ -5154,7 +5209,7 @@ local function traced_apply(list,parsed,nofparsed) logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest)) elseif kind == "expression" then collected = apply_expression(collected,pi.evaluator,i) - logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression) + logs.report("lpath", "% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted) elseif kind == "finalizer" then collected = pi.finalizer(collected) logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "") @@ -6091,7 +6146,7 @@ end local function text(collected) if collected then - return xmltostring(collected[1]) -- only first as we cannot concat function + return xmltostring(collected[1].dt) -- only first as we cannot concat function else return "" end diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index cbb27098d..7507b5e6e 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -1919,6 +1919,7 @@ function file.collapse_path(str) return str end +--~ print(file.collapse_path("/a")) --~ print(file.collapse_path("a/./b/..")) --~ print(file.collapse_path("a/aa/../b/bb")) --~ print(file.collapse_path("a/../..")) @@ -3517,8 +3518,9 @@ element.</p> local nsremap, resolvens = xml.xmlns, xml.resolvens -local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {} +local stack, top, dt, at, xmlns, errorstr, entities = { }, { }, { }, { }, { }, nil, { } local strip, cleanup, utfize, resolve = false, false, false, false +local dcache, hcache, acache = { }, { }, { } local mt = { } @@ -3638,8 +3640,6 @@ local function attribute_specification_error(str) return str end -local dcache, hcache, acache = { }, { }, { } - function xml.unknown_dec_entity_format(str) return format("&%s;", str) end function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end function xml.unknown_any_entity_format(str) return format("&%s;", str) end @@ -3659,12 +3659,13 @@ local function handle_hex_entity(str) if trace_entities then logs.report("xml","found entity &#x%s;",str) end - h = "&#" .. str .. ";" + h = "&#c" .. str .. ";" end hcache[str] = h end return h end + local function handle_dec_entity(str) local d = dcache[str] if not d then @@ -3680,30 +3681,77 @@ local function handle_dec_entity(str) if trace_entities then logs.report("xml","found entity &#%s;",str) end - d = "&" .. str .. ";" + d = "&#" .. str .. ";" end dcache[str] = d end return d end + +-- one level expansion (simple case) + +local function fromhex(s) + local n = tonumber(s,16) + if n then + return utfchar(n) + else + return format("h:%s",s), true + end +end + +local function fromdec(s) + local n = tonumber(s) + if n then + return utfchar(n) + else + return format("d:%s",s), true + end +end + +local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs + +local rest = (1-P(";"))^0 +local many = P(1)^0 + +local parsedentity = + P("&") * (P("#x")*(rest/fromhex) + P("#")*(rest/fromdec)) * P(";") * P(-1) + + (P("#x")*(many/fromhex) + P("#")*(many/fromdec)) + +xml.parsedentitylpeg = parsedentity + local function handle_any_entity(str) if resolve then - local a = entities[str] -- per instance ! + local a = acache[str] -- per instance ! todo if not a then - a = acache[str] - if not a then + if type(resolve) == "function" then + a = resolve(str) or entities[str] + else + a = entities[str] + end + if a then if trace_entities then - logs.report("xml","ignoring entity &%s;",str) + logs.report("xml","resolved entity &%s; -> %s (internal)",str,a) + end + a = parsedentity:match(a) or a + else + if xml.unknown_any_entity_format then + a = xml.unknown_any_entity_format(str) or "" + end + if a then + if trace_entities then + logs.report("xml","resolved entity &%s; -> %s (external)",str,a) + end else - -- can be defined in a global mapper and intercepted elsewhere - -- as happens in lxml-tex.lua + if trace_entities then + logs.report("xml","keeping entity &%s;",str) + end + a = "&" .. str .. ";" end - a = xml.unknown_any_entity_format(str) or "" - acache[str] = a end + acache[str] = a elseif trace_entities then if not acache[str] then - logs.report("xml","converting entity &%s; into %s",str,r) + logs.report("xml","converting entity &%s; into %s",str,a) acache[str] = a end end @@ -3721,8 +3769,6 @@ local function handle_any_entity(str) end end -local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs - local space = S(' \r\n\t') local open = P('<') local close = P('>') @@ -3744,12 +3790,11 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') + local spacing = C(space^0) local entitycontent = (1-open-semicolon)^0 -local entity = ampersand/"" * ( - P("#")/"" * ( +local parsedentity = P("#")/"" * ( P("x")/"" * (entitycontent/handle_hex_entity) + (entitycontent/handle_dec_entity) ) + (entitycontent/handle_any_entity) - ) * (semicolon/"") +local entity = ampersand/"" * parsedentity * (semicolon/"") local text_unparsed = C((1-open)^1) local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1) @@ -3848,7 +3893,8 @@ local function xmlconvert(data, settings) utfize = settings.utfize_entities resolve = settings.resolve_entities cleanup = settings.text_cleanup - stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {} + stack, top, at, xmlns, errorstr, result, entities = { }, { }, { }, { }, nil, nil, settings.entities or { } + acache, hcache, dcache = { }, { }, { } -- not stored reported_attribute_errors = { } if settings.parent_root then mt = getmetatable(settings.parent_root) @@ -4572,9 +4618,9 @@ apply_axis['descendant-or-self'] = function(list) local collected = { } for l=1,#list do local ll = list[l] -if ll.special ~= true then -- catch double root - collected[#collected+1] = ll -end + if ll.special ~= true then -- catch double root + collected[#collected+1] = ll + end collect(ll,collected) end return collected @@ -4663,7 +4709,7 @@ local function apply_nodes(list,directive,nodes) return { } end else - local collected = { } + local collected, m, p = { }, 0, nil if not nns then -- only check tag for l=1,#list do local ll = list[l] @@ -4671,10 +4717,12 @@ local function apply_nodes(list,directive,nodes) if ltg then if directive then if ntg == ltg then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif ntg ~= ltg then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4685,10 +4733,12 @@ local function apply_nodes(list,directive,nodes) if lns then if directive then if lns == nns then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif lns ~= nns then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4701,10 +4751,12 @@ local function apply_nodes(list,directive,nodes) local ok = ltg == ntg and lns == nns if directive then if ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif not ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4712,7 +4764,7 @@ local function apply_nodes(list,directive,nodes) return collected end else - local collected = { } + local collected, m, p = { }, 0, nil for l=1,#list do local ll = list[l] local ltg = ll.tg @@ -4728,10 +4780,12 @@ local function apply_nodes(list,directive,nodes) end if directive then if ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end elseif not ok then - collected[#collected+1] = ll + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + collected[#collected+1], ll.mi = ll, m end end end @@ -4752,31 +4806,29 @@ end local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb -local spaces = S(" \n\r\t\f")^0 - -local lp_space = S(" \n\r\t\f") -local lp_any = P(1) - -local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==") -local lp_doequal = P("=") / "==" -local lp_or = P("|") / " or " -local lp_and = P("&") / " and " +local spaces = S(" \n\r\t\f")^0 +local lp_space = S(" \n\r\t\f") +local lp_any = P(1) +local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==") +local lp_doequal = P("=") / "==" +local lp_or = P("|") / " or " +local lp_and = P("&") / " and " local lp_builtin = P ( P("first") / "1" + P("last") / "#list" + P("position") / "l" + P("rootposition") / "order" + - P("index") / "ll.ni" + + P("index") / "(ll.ni or 1)" + + P("match") / "(ll.mi or 1)" + P("text") / "(ll.dt[1] or '')" + P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" + P("tag") / "ll.tg" + P("ns") / "ll.ns" ) * ((spaces * P("(") * spaces * P(")"))/"") -local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * R("az","AZ","--","__")^1 * Cc("'])") -local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end) - +local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * R("az","AZ","--","__")^1 * Cc("'])") +local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end) local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false") local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling @@ -4797,9 +4849,9 @@ local noparent = 1 - (lparent+rparent) local nested = lpeg.P{lparent * (noparent + lpeg.V(1))^0 * rparent} local value = lpeg.P(lparent * lpeg.C((noparent + nested)^0) * rparent) -- lpeg.P{"("*C(((1-S("()"))+V(1))^0)*")"} -local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')") -local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'") -local lp_content= (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"')) +local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')") +local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'") +local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"')) local cleaner @@ -4941,7 +4993,9 @@ local parser = Ct { "patterns", -- can be made a bit faster by moving pattern ou protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"), - step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + -- the / is needed for // as descendant or self is somewhat special + -- step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + step = ((V("shortcuts") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") + V("descendant_or_self") + V("following") + V("following_sibling") + @@ -4956,13 +5010,14 @@ local parser = Ct { "patterns", -- can be made a bit faster by moving pattern ou shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0, - s_descendant_or_self = P("/") * Cc(register_descendant_or_self), - s_descendant = P("**") * Cc(register_descendant), - s_child = P("*") * Cc(register_child ), - s_parent = P("..") * Cc(register_parent ), - s_self = P("." ) * Cc(register_self ), - s_root = P("^^") * Cc(register_root ), - s_ancestor = P("^") * Cc(register_ancestor ), + s_descendant_or_self = (P("***/") + P("/")) * Cc(register_descendant_or_self), --- *** is a bonus + -- s_descendant_or_self = P("/") * Cc(register_descendant_or_self), + s_descendant = P("**") * Cc(register_descendant), + s_child = P("*") * #(1-P(":")) * Cc(register_child ), + s_parent = P("..") * Cc(register_parent ), + s_self = P("." ) * Cc(register_self ), + s_root = P("^^") * Cc(register_root ), + s_ancestor = P("^") * Cc(register_ancestor ), descendant = P("descendant::") * Cc(register_descendant ), child = P("child::") * Cc(register_child ), @@ -5154,7 +5209,7 @@ local function traced_apply(list,parsed,nofparsed) logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest)) elseif kind == "expression" then collected = apply_expression(collected,pi.evaluator,i) - logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression) + logs.report("lpath", "% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted) elseif kind == "finalizer" then collected = pi.finalizer(collected) logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "") @@ -6091,7 +6146,7 @@ end local function text(collected) if collected then - return xmltostring(collected[1]) -- only first as we cannot concat function + return xmltostring(collected[1].dt) -- only first as we cannot concat function else return "" end |