diff options
author | Marius <mariausol@gmail.com> | 2011-06-08 20:40:24 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2011-06-08 20:40:24 +0300 |
commit | e5fe861660d5cf60cfeb67f7e57f659b309e9613 (patch) | |
tree | f5bc4ea0cd61f18c096f0fd5df8cf439700ba71e /tex/context/base/back-exp.lua | |
parent | b17f22aa285224dcf5b1dbccd795bc73b9a16426 (diff) | |
download | context-e5fe861660d5cf60cfeb67f7e57f659b309e9613.tar.gz |
beta 2011.06.08 19:06
Diffstat (limited to 'tex/context/base/back-exp.lua')
-rw-r--r-- | tex/context/base/back-exp.lua | 2001 |
1 files changed, 1329 insertions, 672 deletions
diff --git a/tex/context/base/back-exp.lua b/tex/context/base/back-exp.lua index 7f52223a1..7e27bd259 100644 --- a/tex/context/base/back-exp.lua +++ b/tex/context/base/back-exp.lua @@ -12,14 +12,18 @@ if not modules then modules = { } end modules ['back-exp'] = { -- Because we need to look ahead we now always build a tree (this was optional in -- the beginning). The extra overhead in the frontend is neglectable. --- We can consider replacing attributes by the hash entry ... slower in resolving but it's still --- quite okay. +-- We can consider replacing attributes by the hash entry ... slower +-- in resolving but it's still quite okay. -- todo: less attributes e.g. internal only first node -- todo: build xml tree in mem (handy for cleaning) -- delimited: left/right string (needs marking) +-- we can optimize the code ... currently the overhead is some 10% for xml + html + +-- option: pack strings each page so that we save memory + local nodecodes = nodes.nodecodes local traverse_nodes = node.traverse local hlist_code = nodecodes.hlist @@ -46,14 +50,13 @@ nodes.locate = locate local next, type = next, type local format, match, concat, rep, sub, gsub, gmatch = string.format, string.match, table.concat, string.rep, string.sub, string.gsub, string.gmatch local lpegmatch = lpeg.match -local utfchar, utfsub = utf.char, utf.sub +local utfchar, utfbyte, utfsub, utfgsub = utf.char, utf.byte, utf.sub, utf.gsub local insert, remove = table.insert, table.remove local trace_export = false trackers.register ("structures.export", function(v) trace_export = v end) -local trace_spaces = false trackers.register ("structures.export.spaces", function(v) trace_spaces = v end) -local trace_tree = false trackers.register ("structures.export.showtree", function(v) trace_tree = v end) local less_state = false directives.register("structures.export.lessstate", function(v) less_state = v end) local page_breaks = false directives.register("structures.export.pagebreaks", function(v) page_breaks = v end) +local show_comment = true directives.register("structures.export.comment", function(v) show_comment = v end) local report_export = logs.reporter("backend","export") @@ -86,9 +89,14 @@ local refximage_code = whatsitcodes.pdfrefximage local userskip_code = skipcodes.userskip local rightskip_code = skipcodes.rightskip local parfillskip_code = skipcodes.parfillskip +local spaceskip_code = skipcodes.spaceskip +local xspaceskip_code = skipcodes.xspaceskip local line_code = listcodes.line +local a_characters = attributes.private('characters') +local a_exportstatus = attributes.private('exportstatus') + local a_tagged = attributes.private('tagged') local a_image = attributes.private('image') @@ -100,10 +108,16 @@ local a_taggedpacked = attributes.private("taggedpacked") local a_taggedsymbol = attributes.private("taggedsymbol") local a_taggedinsert = attributes.private("taggedinsert") local a_taggedtag = attributes.private("taggedtag") +local a_mathcategory = attributes.private("mathcategory") +local a_mathmode = attributes.private("mathmode") local a_reference = attributes.private('reference') +local a_textblock = attributes.private("textblock") + local has_attribute = node.has_attribute +local set_attribute = node.set_attribute +local traverse_id = node.traverse_id local traverse_nodes = node.traverse local slide_nodelist = node.slide local texattribute = tex.attribute @@ -116,32 +130,48 @@ local taglist = structurestags.taglist local properties = structurestags.properties local userdata = structurestags.userdata -- might be combines with taglist local tagdata = structurestags.data +local tagmetadata = structurestags.metadata local starttiming = statistics.starttiming local stoptiming = statistics.stoptiming -- todo: more locals (and optimize) -local version = "0.20" -local result = nil -- todo: nofresult -local entry = nil -local attributehash = { } -local hyphen = utfchar(0xAD) -- todo: also emdash etc -local colonsplitter = lpeg.splitat(":") -local dashsplitter = lpeg.splitat("-") -local threshold = 65536 -local indexing = false -local linedone = false -local inlinedepth = 0 -local tree = { data = { }, depth = 0 } -- root -local treestack = { } -local treehash = { } -local extras = { } -local nofbreaks = 0 -local used = { } -local exporting = false -local last = nil -local lastpar = nil +local exportversion = "0.22" + +local nofcurrentcontent = 0 -- so we don't free (less garbage collection) +local currentcontent = { } +local currentnesting = nil +local currentattribute = nil +local last = nil +local currentparagraph = nil + +local noftextblocks = 0 + +local attributehash = { } -- to be considered: set the values at the tex end +local hyphen = utfchar(0xAD) -- todo: also emdash etc +local colonsplitter = lpeg.splitat(":") +local dashsplitter = lpeg.splitat("-") +local threshold = 65536 +local indexing = false + +local treestack = { } +local nesting = { } +local currentdepth = 0 + +local tree = { data = { }, depth = 0, fulltag == "root" } -- root +local treeroot = tree +local treehash = { } +local extras = { } +local nofbreaks = 0 +local used = { } +local exporting = false +local restart = false +local specialspaces = { [0x20] = " " } -- for conversion +local somespace = { [0x20] = true, [" "] = true } -- for testing +local entities = { ["&"] = "&", [">"] = ">", ["<"] = "<" } + +local defaultnature = "mixed" -- "inline" setmetatableindex(used, function(t,k) if k then @@ -151,16 +181,15 @@ setmetatableindex(used, function(t,k) end end) -local joiner_1 = " " -local joiner_2 = " " -- todo: test if this one can always be "" -local joiner_3 = " " -local joiner_4 = " " -local joiner_5 = " " -local joiner_6 = " " -local joiner_7 = "\n" -local joiner_8 = " " -local joiner_9 = " " -local joiner_0 = " " +setmetatableindex(specialspaces, function(t,k) + local v = utfchar(k) + t[k] = v + entities[v] = format("&#%X;",k) + somespace[k] = true + somespace[v] = true + return v +end) + local namespaced = { -- filled on @@ -185,13 +214,19 @@ local namespaces = { merror = "m", math = "m", mrow = "m", + mtable = "m", + mtr = "m", + mtd = "m", + mfenced = "m", } setmetatableindex(namespaced, function(t,k) - local namespace = namespaces[k] - local v = namespace and namespace .. ":" .. k or k - t[k] = v - return v + if k then + local namespace = namespaces[k] + local v = namespace and namespace .. ":" .. k or k + t[k] = v + return v + end end) -- local P, C, Cc = lpeg.P, lpeg.C, lpeg.Cc @@ -218,24 +253,45 @@ end local spaces = { } -- watch how we also moved the -1 in depth-1 to the creator -setmetatableindex(spaces, function(t,k) t[k] = rep(" ",k-1) return t[k] end) +setmetatableindex(spaces, function(t,k) local s = rep(" ",k-1) t[k] = s return s end) + +function structurestags.setattributehash(fulltag,key,value) + if type(fulltag) == "number" then + fulltag = taglist[fulltag] + if fulltag then + fulltag = fulltag[#fulltag] + end + end + if fulltag then + local ah = attributehash[fulltag] -- could be metatable magic + if not ah then + ah = { } + attributehash[fulltag] = ah + end + ah[key] = value + end +end properties.vspace = { export = "break", nature = "display" } properties.pbreak = { export = "pagebreak", nature = "display" } -local function makebreak(entry) +local function makebreaklist(list) nofbreaks = nofbreaks + 1 - local t, tl = { }, entry[1] - if tl then - for i=1,#tl do - t[i] = tl[i] + local t = { } + if list then + for i=1,#list do + t[i] = list[i] end end - t[#t+1] = "break-" .. nofbreaks - return { t, { "" }, 0, 0 } + t[#t+1] = "break-" .. nofbreaks -- maybe no number + return t end -local function makebreaknode(node) +local breakattributes = { + type = "collapse" +} + +local function makebreaknode(node) -- maybe no fulltag nofbreaks = nofbreaks + 1 return { tg = "break", @@ -244,31 +300,49 @@ local function makebreaknode(node) depth = node.depth, element = "break", nature = "display", - data = { }, - attribute = { } , - parnumber = 0, + -- attributes = breakattributes, + -- data = { }, -- not needed + -- attribute = 0, -- not needed + -- parnumber = 0, } end local fields = { "title", "subtitle", "author", "keywords" } -function extras.document(handle,element,detail,n,fulltag,hash) - handle:write(format(" language=%q",languagenames[tex.count.mainlanguagenumber])) +local function checkdocument(root) + local data = root.data + if data then + for i=1,#data do + local di = data[i] + if type(di) == "table" then + if di.tg == "ignore" then + di.element = "" + else + checkdocument(di) + end + end + end + end +end + +function extras.document(result,element,detail,n,fulltag,di) + result[#result+1] = format(" language=%q",languagenames[tex.count.mainlanguagenumber]) if not less_state then - handle:write(format(" file=%q",tex.jobname)) - handle:write(format(" date=%q",os.date())) - handle:write(format(" context=%q",environment.version)) - handle:write(format(" version=%q",version)) - handle:write(format(" xmlns:m=%q","http://www.w3.org/1998/Math/MathML")) + result[#result+1] = format(" file=%q",tex.jobname) + result[#result+1] = format(" date=%q",os.date()) + result[#result+1] = format(" context=%q",environment.version) + result[#result+1] = format(" version=%q",exportversion) + result[#result+1] = format(" xmlns:m=%q","http://www.w3.org/1998/Math/MathML") local identity = interactions.general.getidentity() for i=1,#fields do local key = fields[i] local value = identity[key] if value and value ~= "" then - handle:write(format(" %s=%q",key,value)) + result[#result+1] = format(" %s=%q",key,value) end end end + checkdocument(di) end local snames, snumbers = { }, { } @@ -319,51 +393,51 @@ function structurestags.setdescriptionid(tag,n) end end -function extras.descriptiontag(handle,element,detail,n,fulltag,di) +function extras.descriptiontag(result,element,detail,n,fulltag,di) local hash = attributehash[fulltag] if hash then local v = hash.insert v = v and insertids[v] if v then - handle:write(" insert='",v,"'") + result[#result+1] = format(" insert='%s'",v) end end end -function extras.descriptionsymbol(handle,element,detail,n,fulltag,di) +function extras.descriptionsymbol(result,element,detail,n,fulltag,di) local hash = attributehash[fulltag] if hash then local v = hash.insert v = v and insertids[v] if v then - handle:write(" insert='",v,"'") + result[#result+1] = format(" insert='%s'",v) end end end -function extras.synonym(handle,element,detail,n,fulltag,di) +function extras.synonym(result,element,detail,n,fulltag,di) local hash = attributehash[fulltag] if hash then local v = hash.tag v = v and synonymnames[v] if v then - handle:write(" tag='",v,"'") + result[#result+1] = format(" tag='%s'",v) end end end -function extras.sorting(handle,element,detail,n,fulltag,di) +function extras.sorting(result,element,detail,n,fulltag,di) local hash = attributehash[fulltag] if hash then local v = hash.tag v = v and sortingnames[v] if v then - handle:write(" tag='",v,"'") + result[#result+1] = format(" tag='%s'",v) end end end -function extras.image(handle,element,detail,n,fulltag,di) +function extras.image(result,element,detail,n,fulltag,di) local hash = attributehash[fulltag] if hash then local v = hash.imageindex @@ -375,13 +449,13 @@ function extras.image(handle,element,detail,n,fulltag,di) local path = file.dirname(fullname) local page = figure.page or 1 if name ~= "" then - handle:write(" name='",name,"'") + result[#result+1] = format(" name='%s'",name) end if path ~= "" then - handle:write(" path='",path,"'") + result[#result+1] = format(" path='%s'",path) end if page > 1 then - handle:write(" page='",page,"'") + result[#result+1] = format(" page='%s'",page) end end end @@ -393,37 +467,37 @@ end local evaluators = { } local specials = { } -evaluators.inner = function(handle,var) +evaluators.inner = function(result,var) local inner = var.inner - if var.inner then - handle:write(" location='",inner,"'") + if inner then + result[#result+1] = format(" location='%s'",inner) end end -evaluators.outer = function(handle,var) +evaluators.outer = function(result,var) local file, url = references.checkedfileorurl(var.outer,var.outer) if url then - handle:write(" url='",url,"'") + result[#result+1] = format(" url='%s'",url) elseif file then - handle:write(" file='",file,"'") + result[#result+1] = format(" file='%s'",file) end end -evaluators["outer with inner"] = function(handle,var) +evaluators["outer with inner"] = function(result,var) local file = references.checkedfile(var.f) if file then - handle:write(" file='",file,"'") + result[#result+1] = format(" file='%s'",file) end local inner = var.inner - if var.inner then - handle:write(" location='",inner,"'") + if inner then + result[#result+1] = format(" location='%s'",inner) end end -evaluators.special = function(handle,var) +evaluators.special = function(result,var) local handler = specials[var.special] if handler then - handler(handle,var) + handler(result,var) end end @@ -431,80 +505,80 @@ evaluators["special outer with operation"] = evaluators.special evaluators["special operation"] = evaluators.special evaluators["special operation with arguments"] = evaluators.special -function specials.url(handle,var) +function specials.url(result,var) local url = references.checkedurl(var.operation) if url then - handle:write(" url='",url,"'") + result[#result+1] = format(" url='%s'",url) end end -function specials.file(handle,var) +function specials.file(result,var) local file = references.checkedfile(var.operation) if file then - handle:write(" file='",file,"'") + result[#result+1] = format(" file='%s'",file) end end -function specials.fileorurl(handle,var) +function specials.fileorurl(result,var) local file, url = references.checkedfileorurl(var.operation,var.operation) if url then - handle:write(" url='",url,"'") + result[#result+1] = format(" url='%s'",url) elseif file then - handle:write(" file='",file,"'") + result[#result+1] = format(" file='%s'",file) end end -function specials.internal(handle,var) +function specials.internal(result,var) local internal = references.checkedurl(var.operation) if internal then - handle:write(" location='aut:",internal,"'") + result[#result+1] = format(" location='aut:%s'",internal) end end -local function adddestination(handle,references) -- todo: specials -> exporters and then concat +local function adddestination(result,references) -- todo: specials -> exporters and then concat if references then local reference = references.reference if reference and reference ~= "" then local prefix = references.prefix if prefix and prefix ~= "" then - handle:write(" prefix='",prefix,"'") + result[#result+1] = format(" prefix='%s'",prefix) end - handle:write(" destination='",reference,"'") + result[#result+1] = format(" destination='%s'",reference) for i=1,#references do local r = references[i] local e = evaluators[r.kind] if e then - e(handle,r) + e(result,r) end end end end end -local function addreference(handle,references) +local function addreference(result,references) if references then local reference = references.reference if reference and reference ~= "" then local prefix = references.prefix if prefix and prefix ~= "" then - handle:write(" prefix='",prefix,"'") + result[#result+1] = format(" prefix='%s'",prefix) end - handle:write(" reference='",reference,"'") + result[#result+1] = format(" reference='%s'",reference) end local internal = references.internal if internal and internal ~= "" then - handle:write(" location='aut:",internal,"'") + result[#result+1] = format(" location='aut:%s'",internal) end end end -function extras.link(handle,element,detail,n,fulltag,di) +function extras.link(result,element,detail,n,fulltag,di) -- for instance in lists a link has nested elements and no own text local hash = attributehash[fulltag] if hash then local references = hash.reference if references then - adddestination(handle,structures.references.get(references)) + adddestination(result,structures.references.get(references)) end return true else @@ -512,7 +586,7 @@ function extras.link(handle,element,detail,n,fulltag,di) if data then for i=1,#data do local di = data[i] - if di and extras.link(handle,element,detail,n,di.fulltag,di) then + if di and extras.link(result,element,detail,n,di.fulltag,di) then return true end end @@ -520,10 +594,297 @@ function extras.link(handle,element,detail,n,fulltag,di) end end -function extras.section(handle,element,detail,n,fulltag,di) +local automathrows = true directives.register("backend.export.math.autorows", function(v) automathrows = v end) +local automathapply = true directives.register("backend.export.math.autoapply", function(v) automathapply = v end) +local automathnumber = true directives.register("backend.export.math.autonumber", function(v) automathnumber = v end) +local automathstrip = true directives.register("backend.export.math.autostrip", function(v) automathstrip = v end) + +local functions = mathematics.categories.functions + +local function collapse(di,i,data,ndata,detail,element) + local collapsing = di.data + if data then + di.element = element + di.detail = nil + i = i + 1 + while i <= ndata do + local dn = data[i] + if dn.detail == detail then + collapsing[#collapsing+1] = dn.data[1] + dn.skip = "ignore" + i = i + 1 + else + break + end + end + end + return i +end + +local function collapse_mn(di,i,data,ndata) + local collapsing = di.data + if data then + i = i + 1 + while i <= ndata do + local dn = data[i] + local tg = dn.tg + if tg == "mn" then + collapsing[#collapsing+1] = dn.data[1] + dn.skip = "ignore" + i = i + 1 + elseif tg == "mo" then + local d = dn.data[1] + if d == "." then + collapsing[#collapsing+1] = d + dn.skip = "ignore" + i = i + 1 + else + break + end + else + break + end + end + end + return i +end + +-- maybe delay __i__ till we need it + +local function checkmath(root) -- we can provide utf.toentities as an option + local data = root.data + if data then + local ndata = #data + local roottg = root.tg + if roottg == "msubsup" then + local nucleus, superscript, subscript + for i=1,ndata do + if type(data[i]) == "table" then + if not nucleus then + nucleus = i + elseif not superscript then + superscript = i + elseif not subscript then + subscript = i + else + -- error + end + end + end + if superscript and subscript then + local sup, sub = data[superscript], data[subscript] + data[superscript], data[subscript] = sub, sup + -- sub.__o__, sup.__o__ = subscript, superscript + sub.__i__, sup.__i__ = superscript, subscript + end + elseif roottg == "mfenced" then + local new, n = { }, 0 + local attributes = { } + root.attributes = attributes + for i=1,ndata do + local di = data[i] + if type(di) == "table" then + local tg = di.tg + if tg == "mleft" then + attributes.left = tostring(di.data[1].data[1]) + elseif tg == "mmiddle" then + attributes.middle = tostring(di.data[1].data[1]) + elseif tg == "mright" then + attributes.right = tostring(di.data[1].data[1]) + else + n = n + 1 + di.__i__ = n + new[n] = di + end + else + n = n + 1 + new[n] = di + end + end + root.data = new + ndata = n + end + if ndata == 0 then + return + elseif ndata == 1 then + local d = data[1] + if type(d) ~= "table" then + return -- can be string or false + elseif #root.data == 1 then + local tg = d.tg + if automathrows and roottg == "mrow" then + if tg == "mrow" or tg == "mfenced" or tg == "mfrac" or tg == "mroot" then + root.skip = "comment" + end + elseif roottg == "mo" then + if tg == "mo" then + root.skip = "comment" + end + end + end + end + local i = 1 + while i <= ndata do -- -- -- TOO MUCH NESTED CHECKING -- -- -- + local di = data[i] + if di and type(di) == "table" then + local tg = di.tg + local detail = di.detail + if tg == "math" then + -- di.element = "mrow" -- when properties + di.skip = "comment" + checkmath(di) + i = i + 1 + elseif tg == "mover" or tg == "munder" or tg == "munderover" then + if detail == "accent" then + di.attributes = { accent = "true" } + di.detail = nil + end + checkmath(di) + i = i + 1 + elseif tg == "break" then + di.skip = "comment" + i = i + 1 + elseif detail then + -- no checkmath(di) here + local category = tonumber(detail) or 0 + if category == 1 then -- mo + i = collapse(di,i,data,ndata,detail,"mo") + elseif category == 2 then -- mi + i = collapse(di,i,data,ndata,detail,"mi") + elseif category == 3 then -- mn + i = collapse(di,i,data,ndata,detail,"mn") + elseif category == 4 then -- ms + i = collapse(di,i,data,ndata,detail,"ms") + elseif category >= 1000 then + local apply = category >= 2000 + if apply then + category = category - 1000 + end + if tg == "mi" then -- function + if root.tg == "mrow" then + root.skip = "comment" + root.element = "function" + end + i = collapse(di,i,data,ndata,detail,"mi") + local tag = functions[category] + if tag then + di.data = { tag } + end + if apply then + di.after = { + { + element = "mo", + -- comment = "apply function", + -- data = { utfchar(0x2061) }, + data = { "⁡" }, + nature = "mixed", + depth = di.depth, + } + } + elseif automathapply then -- make function + local following + if i <= ndata then + -- normally not the case + following = data[i] + else + local parent = di.__p__ -- == root + if parent.tg == "mrow" then + parent = parent.__p__ + end + local index = parent.__i__ + following = parent.data[index+1] + end + if following then + local tg = following.tg + if tg == "mrow" or tg == "mfenced" then -- we need to figure out the right condition + di.after = { + { + element = "mo", + -- comment = "apply function", + -- data = { utfchar(0x2061) }, + data = { "⁡" }, + nature = "mixed", + depth = di.depth, + } + } + end + end + end + else -- some problem + checkmath(di) + i = i + 1 + end + else + checkmath(di) + i = i + 1 + end + elseif automathnumber and tg == "mn" then + checkmath(di) + i = collapse_mn(di,i,data,ndata) + else + checkmath(di) + i = i + 1 + end + else -- can be string or boolean + if parenttg ~= "mtext" and di == " " then + data[i] = false + end + i = i + 1 + end + end + end +end + +function stripmath(di) + local tg = di.tg + if tg == "mtext" or tg == "ms" then + return di + else + local data = di.data + local ndata = #data + local n = 0 + for i=1,ndata do + local di = data[i] + if type(di) == "table" then + di = stripmath(di) + end + if not di or di == " " or di == "" then + -- skip + elseif type(di) == "table" then + n = n + 1 + di.__i__ = n + data[n] = di + else + n = n + 1 + data[n] = di + end + end + for i=ndata,n+1,-1 do + data[i] = nil + end + if #data > 0 then + return di + end + end +end + +function extras.math(result,element,detail,n,fulltag,di) + if di then + local hash = attributehash[di.fulltag] + di.attributes = { + display = (hash and hash.mode) == "display" and "block" or "inline" + } + if automathstrip then + stripmath(di) + end + checkmath(di) + end +end + +function extras.section(result,element,detail,n,fulltag,di) local data = listdata[fulltag] if data then - addreference(handle,data.references) + addreference(result,data.references) return true else local data = di.data @@ -532,7 +893,7 @@ function extras.section(handle,element,detail,n,fulltag,di) local di = data[i] if di then local ft = di.fulltag - if ft and extras.section(handle,element,detail,n,ft,di) then + if ft and extras.section(result,element,detail,n,ft,di) then return true end end @@ -541,17 +902,17 @@ function extras.section(handle,element,detail,n,fulltag,di) end end -function extras.float(handle,element,detail,n,fulltag,di) +function extras.float(result,element,detail,n,fulltag,di) local data = listdata[fulltag] if data then - addreference(handle,data.references) + addreference(result,data.references) return true else local data = di.data if data then for i=1,#data do local di = data[i] - if di and extras.section(handle,element,detail,n,di.fulltag,di) then + if di and extras.section(result,element,detail,n,di.fulltag,di) then return true end end @@ -559,28 +920,30 @@ function extras.float(handle,element,detail,n,fulltag,di) end end -function extras.itemgroup(handle,element,detail,n,fulltag,di) +function extras.itemgroup(result,element,detail,n,fulltag,di) local data = di.data - for i=1,#data do - local di = data[i] - if type(di) == "table" and di.tg == "item" then - local ddata = di.data - for i=1,#ddata do - local ddi = ddata[i] - if type(ddi) == "table" then - local tg = ddi.tg - if tg == "itemtag" or tg == "itemcontent" then - local hash = attributehash[ddi.fulltag] - if hash then - local v = hash.packed - if v and v == 1 then - handle:write(" packed='yes'") - end - local v = hash.symbol - if v then - handle:write(" symbol='",snames[v],"'") + if data then + for i=1,#data do + local di = data[i] + if type(di) == "table" and di.tg == "item" then + local ddata = di.data + for i=1,#ddata do + local ddi = ddata[i] + if type(ddi) == "table" then + local tg = ddi.tg + if tg == "itemtag" or tg == "itemcontent" then + local hash = attributehash[ddi.fulltag] + if hash then + local v = hash.packed + if v and v == 1 then + result[#result+1] = " packed='yes'" + end + local v = hash.symbol + if v then + result[#result+1] = format(" symbol='%s'",snames[v]) + end + return end - return end end end @@ -589,276 +952,246 @@ function extras.itemgroup(handle,element,detail,n,fulltag,di) end end -function extras.tablecell(handle,element,detail,n,fulltag,di) +function extras.tablecell(result,element,detail,n,fulltag,di) local hash = attributehash[fulltag] if hash then local v = hash.align if not v or v == 0 then -- normal elseif v == 1 then - handle:write(" align='flushright'") + result[#result+1] = " align='flushright'" elseif v == 2 then - handle:write(" align='middle'") + result[#result+1] = " align='middle'" elseif v == 3 then - handle:write(" align='flushleft'") + result[#result+1] = " align='flushleft'" end local v = hash.columns if v and v > 1 then - handle:write(" columns='",v,"'") + result[#result+1] = format(" columns='%s'",v) end local v = hash.rows if v and v > 1 then - handle:write(" rows='",v,"'") + result[#result+1] = format(" rows='%s'",v) end end end -function extras.tabulatecell(handle,element,detail,n,fulltag,di) +function extras.tabulatecell(result,element,detail,n,fulltag,di) local hash = attributehash[fulltag] if hash then local v = hash.align if not v or v == 0 then -- normal elseif v == 1 then - handle:write(" align='flushright'") + result[#result+1] = " align='flushright'" elseif v == 2 then - handle:write(" align='middle'") + result[#result+1] = " align='middle'" elseif v == 3 then - handle:write(" align='flushleft'") + result[#result+1] = " align='flushleft'" end end end -local function emptytag(handle,element,nature,depth) - handle:write("\n",spaces[depth],"<",namespaced[element],"/>\n") +-- flusher + +local linedone = false -- can go ... we strip newlines anyway +local inlinedepth = 0 + +local function emptytag(result,element,nature,depth) + if linedone then + result[#result+1] = format("%s<%s/>\n",spaces[depth],namespaced[element]) + else + result[#result+1] = format("\n%s<%s/>\n",spaces[depth],namespaced[element]) + end + linedone = false end -local function begintag(handle,element,nature,depth,di,empty) - local detail, n, fulltag = di.detail, di.n, di.fulltag +local function begintag(result,element,nature,depth,di,skip) + -- if needed we can use a local result with xresult +--~ local result = { } + local detail = di.detail + local n = di.n + local fulltag = di.fulltag + local comment = di.comment if nature == "inline" then linedone = false inlinedepth = inlinedepth + 1 + if show_comment and comment then + result[#result+1] = format("<!-- %s -->",comment) + end elseif nature == "mixed" then if inlinedepth > 0 then + if show_comment and comment then + result[#result+1] = format("<!-- %s -->",comment) + end elseif linedone then - handle:write(spaces[depth]) + result[#result+1] = spaces[depth] + if show_comment and comment then + result[#result+1] = format("<!-- %s -->",comment) + end else - handle:write("\n",spaces[depth]) + result[#result+1] = format("\n%s",spaces[depth]) linedone = false + if show_comment and comment then + result[#result+1] = format("<!-- %s -->\n%s",comment,spaces[depth]) + end end inlinedepth = inlinedepth + 1 else if inlinedepth > 0 then + if show_comment and comment then + result[#result+1] = format("<!-- %s -->",comment) + end elseif linedone then - handle:write(spaces[depth]) + result[#result+1] = spaces[depth] + if show_comment and comment then + result[#result+1] = format("<!-- %s -->",comment) + end else - handle:write("\n",spaces[depth]) + result[#result+1] = format("\n%s",spaces[depth]) -- can introduced extra line in mixed+mixed (filtered later on) linedone = false + if show_comment and comment then + result[#result+1] = format("<!-- %s -->\n%s",comment,spaces[depth]) + end end end - handle:write("<",namespaced[element]) - if detail then - handle:write(" detail='",detail,"'") - end - if indexing and n then - handle:write(" n='",n,"'") - end - local extra = extras[element] - if extra then - extra(handle,element,detail,n,fulltag,di) - end - local u = userdata[fulltag] - if u then - for k, v in next, u do - handle:write(format(" %s=%q",k,v)) + if skip == "comment" then + if show_comment then + result[#result+1] = format("<!-- begin %s -->",namespaced[element]) + end + elseif skip then + -- ignore + else + result[#result+1] = format("<%s",namespaced[element]) + if detail then + result[#result+1] = format(" detail=%q",detail) + end + if indexing and n then + result[#result+1] = format(" n=%q",n) + end + local extra = extras[element] + if extra then + extra(result,element,detail,n,fulltag,di) + end + local u = userdata[fulltag] + if u then + for k, v in next, u do + result[#result+1] = format(" %s=%q",k,v) + end end + local a = di.attributes + if a then + for k, v in next, a do + result[#result+1] = format(" %s=%q",k,v) + end + end + result[#result+1] = ">" end - if not empty then - handle:write(">") - if inlinedepth > 0 then - elseif nature == "display" then - handle:write("\n") - linedone = true + if inlinedepth > 0 then + elseif nature == "display" then + result[#result+1] = "\n" + linedone = true + end +--~ xresult[#xresult+1] = concat(result) + used[element][detail or ""] = nature -- for template css + local metadata = tagmetadata[fulltag] + if metadata then + -- used[element] = "mixed" + metadata = table.toxml(metadata,"metadata",true,depth*2,2) -- nobanner + if not linedone then + result[#result+1] = format("\n%s\n",metadata) + else + result[#result+1] = format("%s\n",metadata) end + linedone = true end - used[element][detail or ""] = nature end -local function endtag(handle,element,nature,depth,empty) +local function endtag(result,element,nature,depth,skip) if nature == "display" then if inlinedepth == 0 then - if empty then - handle:write("</>\n") - else - if not linedone then - handle:write("\n") + if not linedone then + result[#result+1] = "\n" + end + if skip == "comment" then + if show_comment then + result[#result+1] = format("%s<!-- end %s -->\n",spaces[depth],namespaced[element]) end - handle:write(spaces[depth],"</",namespaced[element],">\n") + elseif skip then + -- ignore + else + result[#result+1] = format("%s</%s>\n",spaces[depth],namespaced[element]) end linedone = true else - if empty then - handle:write("/>") + if skip == "comment" then + if show_comment then + result[#result+1] = format("<!-- end %s -->",namespaced[element]) + end + elseif skip then + -- ignore else - handle:write("</",namespaced[element],">") + result[#result+1] = format("</%s>",namespaced[element]) end end else inlinedepth = inlinedepth - 1 - if empty then - handle:write("/>") + if skip == "comment" then + if show_comment then + result[#result+1] = format("<!-- end %s -->",namespaced[element]) + end + elseif skip then + -- ignore else - handle:write("</",namespaced[element],">") + result[#result+1] = format("</%s>",namespaced[element]) end linedone = false end end -local function push(fulltag,depth,entry) - local attribute, parnumber = entry[3], entry[4] - -- local tg, detail, n = lpegmatch(tagsplitter,fulltag) - local tag, n = lpegmatch(dashsplitter,fulltag) - local tg, detail = lpegmatch(colonsplitter,tag) - local element, nature - if detail then - local pd = properties[tag] - local pt = properties[tg] - element = pd and pd.export or pt and pt.export or tg - nature = pd and pd.nature or pt and pt.nature or "inline" - else - local p = properties[tg] - element = p and p.export or tg - nature = p and p.nature or "inline" - end - local t = { - -- parent = tree, - tg = tg, - fulltag = fulltag, - detail = detail, - n = tonumber(n), -- more efficient - depth = depth, - element = element, - nature = nature, - data = { }, - attribute = attribute, - parnumber = parnumber, - -- node = entry[5], -- will go - } - local treedata = tree.data - treedata[#treedata+1] = t - insert(treestack,tree) - tree = t - local h = treehash[fulltag] - if h then - h[#h+1] = t - else - treehash[fulltag] = { t } - end -end - -local function pop() - tree = remove(treestack) -end - -local function flushresult(entry) - local current, content = entry[1], entry[2] - if not content then - -- skip, normally this cannot happen - else - local newdepth, olddepth, content = #current, #treestack, concat(content) - if trace_export then - report_export("%s => %s : handling: %s",olddepth,newdepth,current[newdepth]) - end - if olddepth <= 0 then - for i=1,newdepth do - if trace_export then - report_export("[1] push : %s",current[i]) - end - push(current[i],i,entry) - end - if content then - tree.data[#tree.data+1] = content - end - elseif newdepth < olddepth then - for i=newdepth,olddepth-1 do - if trace_export then - report_export("[2a] pop : %s",current[i]) - end - pop() - end - -- we can have a pagebreak and for instance a new chapter - -- will mess up the structure then - for i=newdepth,1,-1 do - if current[i] ~= treestack[i].fulltag then -- needs checking - if trace_export then - report_export("[2b] pop : %s",current[i]) - end - pop() - else - break - end - end - olddepth = #treestack - for i=olddepth+1,newdepth do - if trace_export then - report_export("[2] push : %s",current[i]) - end - push(current[i],i,entry) - end - if content then - tree.data[#tree.data+1] = content - end - elseif newdepth > olddepth then - for i=olddepth,1,-1 do - if current[i] ~= treestack[i].fulltag then - if trace_export then - report_export("[3] pop : %s",current[i]) - end - pop() +local function flushtree(result,data,nature) + local nofdata = #data + for i=1,nofdata do + local di = data[i] + if not di then -- or di == "" + -- collapsed + elseif type(di) == "string" then +di = utfgsub(di,".",entities) + if i == nofdata and sub(di,-1) == "\n" then + if nature == "inline" or nature == "mixed" then + result[#result+1] = sub(di,1,-2) else - break - end - end - olddepth = #treestack - for i=olddepth+1,newdepth do - if trace_export then - report_export("[3] push : %s",current[i]) + result[#result+1] = sub(di,1,-2) + result[#result+1] = " " end - push(current[i],i,entry) - end - if content then - tree.data[#tree.data+1] = content - end - elseif current[newdepth] == treestack[olddepth] then --move up ? - -- continuation - if content then - tree.data[#tree.data+1] = content + else + result[#result+1] = di end - else - for i=olddepth,1,-1 do - if current[i] ~= treestack[i].fulltag then - if trace_export then - report_export("[4] pop : %s",current[i]) - end - pop() - else - break + linedone = false + elseif not di.collapsed then + local element = di.element + if element == "break" or element == "pagebreak" then + emptytag(result,element,nature,di.depth) + elseif element == "" or di.skip == "ignore" then + -- skip + else + if di.before then + flushtree(result,di.before,nature) end - end - olddepth = #treestack - for i=olddepth+1,newdepth do - if trace_export then - report_export("[4] push : %s",current[i]) + local nature, depth, skip = di.nature, di.depth, di.skip + begintag(result,element,nature,depth,di,skip) + flushtree(result,di.data,nature) + endtag(result,element,nature,depth,skip) + if di.after then + flushtree(result,di.after,nature) end - push(current[i],i,entry) - end - if content then - tree.data[#tree.data+1] = content end end end end +-- finalizers + local function checkinserts(data) local nofinserts = 0 for i=1,#data do @@ -876,316 +1209,354 @@ local function checkinserts(data) -- something is wrong end end - if di.data then - checkinserts(di.data) - end - end - end -end - -local function flushtree(handle,data,nature) - local nofdata = #data - for i=1,nofdata do - local di = data[i] - if not di then - -- collapsed - elseif type(di) == "string" then -if i == nofdata and sub(di,-1) == joiner_7 then - if nature == "inline" or nature == "mixed" then - handle:write(sub(di,1,-2)) - else - handle:write(sub(di,1,-2)," ") - end -else - handle:write(di) -end - linedone = false - elseif not di.collapsed then - local element = di.element - if element == "break" or element == "pagebreak" then - emptytag(handle,element,nature,di.depth) - else - local nature, depth = di.nature, di.depth - local did = di.data - local nid = #did - if nid == 0 or (nid == 1 and did[1] == "") then - begintag(handle,element,nature,depth,di,true) - -- no content - endtag(handle,element,nature,depth,true) - else - begintag(handle,element,nature,depth,di) - flushtree(handle,did,nature) - endtag(handle,element,nature,depth) - end + local d = di.data + if d then + checkinserts(d) end end end end -local function collapsetree() - for k, v in next, treehash do - local d = v[1].data - local nd = #d - for i=2,#v do - local vi = v[i] - local vd = vi.data - local done = false - local lpn = v[i-1].parnumber - if lpn and lpn == 0 then lpn = nil end - if type(d[1]) ~= "string" then lpn = nil end -- no need anyway so no further testing needed -local justdone = false - for j=1,#vd do - local vdj = vd[j] - if type(vdj) == "string" then ---~ print(vdj) - -- experiment, should be improved - -- can be simplified ... lpn instead of done - if done then - nd = nd + 1 - d[nd] = joiner_1 - else - done = true - local pn = vi.parnumber - if not pn then - nd = nd + 1 - d[nd] = joiner_2 - lpn = nil - elseif not lpn then - nd = nd + 1 - d[nd] = joiner_3 - lpn = pn - elseif pn and pn ~= lpn then +--~ local function collapsetree() -- maybe better traverse tree (par stuff) +--~ for tag, trees in next, treehash do +--~ local d = trees[1].data +--~ if d then +--~ local nd = #d +--~ if nd > 0 then +--~ for i=2,#trees do +--~ local currenttree = trees[i] +--~ local currentdata = currenttree.data +--~ local previouspar = trees[i-1].parnumber +--~ currenttree.collapsed = true +--~ if previouspar == 0 or type(currentdata[1]) ~= "string" then +--~ previouspar = nil -- no need anyway so no further testing needed +--~ end +--~ local done = false +--~ local breakdone = false +--~ local spacedone = false +--~ for j=1,#currentdata do +--~ local cd = currentdata[j] +--~ if not cd then +--~ -- skip +--~ elseif type(cd) == "string" then +--~ if cd == "" then +--~ -- skip +--~ elseif cd == " " then +--~ -- done check ? +--~ if not spacedone and not breakdone then +--~ nd = nd + 1 +--~ d[nd] = cd +--~ spacedone = true +--~ end +--~ elseif done then +--~ if not spacedone and not breakdone then +--~ nd = nd + 1 +--~ d[nd] = " " +--~ spacedone = true +--~ end +--~ nd = nd + 1 +--~ d[nd] = cd +--~ else +--~ done = true +--~ local currentpar = d.parnumber +--~ if not currentpar then +--~ if not spacedone and not breakdone then +--~ nd = nd + 1 +--~ d[nd] = " " -- +--~ spacedone = true +--~ end +--~ previouspar = nil +--~ elseif not previouspar then +--~ if not spacedone and not breakdone then +--~ nd = nd + 1 +--~ d[nd] = " " +--~ spacedone = true +--~ end +--~ previouspar = currentpar +--~ elseif currentpar ~= previouspar then +--~ if not breakdone then +--~ if not spacedone then +--~ nd = nd + 1 +--~ end +--~ d[nd] = makebreaknode(currenttree) +--~ breakdone = true +--~ end +--~ previouspar = currentpar +--~ else +--~ spacedone = false +--~ breakdone = false +--~ end +--~ nd = nd + 1 +--~ d[nd] = cd +--~ end +--~ else +--~ if cd.tg == "break" then +--~ breakdone = true +--~ end +--~ nd = nd + 1 +--~ d[nd] = cd +--~ end +--~ currentdata[j] = false +--~ end +--~ end +--~ end +--~ end +--~ end +--~ end + +local function collapsetree() -- maybe better traverse tree (par stuff) + for tag, trees in next, treehash do + local d = trees[1].data + if d then + local nd = #d + if nd > 0 then + for i=2,#trees do + local currenttree = trees[i] + local currentdata = currenttree.data + local previouspar = trees[i-1].parnumber + currenttree.collapsed = true + if previouspar == 0 or type(currentdata[1]) ~= "string" then + previouspar = nil -- no need anyway so no further testing needed + end + for j=1,#currentdata do + local cd = currentdata[j] + if not cd or cd == "" then + -- skip + elseif type(cd) == "string" then + local currentpar = d.parnumber + if not currentpar then + -- add space ? + elseif not previouspar then + -- add space ? + elseif currentpar ~= previouspar then + nd = nd + 1 + d[nd] = makebreaknode(currenttree) + end + previouspar = currentpar nd = nd + 1 - d[nd] = makebreaknode(vi) - lpn = pn + d[nd] = cd else - -- nd = nd + 1 - -- d[nd] = joiner_4 -- we need to be more clever + nd = nd + 1 + d[nd] = cd end + currentdata[j] = false end - else ---~ nd = nd + 1 ---~ d[nd] = joiner_3 - -- lpn = nil end - if vdj ~= "" then - nd = nd + 1 - d[nd] = vdj -- hm, any? - end - vd[j] = false end - v[i].collapsed = true end end end -local function prunetree(tree) - if not tree.collapsed then - local data = tree.data - if data then - local p, np = { }, 0 - for i=1,#data do - local d = data[i] - if type(d) == "table" then - if not d.collapsed then - prunetree(d) - np = np + 1 - p[np] = d - end - elseif type(d) == "string" then - np = np + 1 - p[np] = d - end +local function indextree(tree) + local data = tree.data + if data then + for i=1,#data do + local d = data[i] + if type(d) == "table" then + d.__i__ = i + d.__p__ = tree + indextree(d) end - tree.data = np > 0 and p end end end -local function finishexport() - if entry then - local result = entry[2] - if result and result[#result] == " " then - result[#result] = nil -- nicer, remove last space - end - flushresult(entry) +-- collector code + +local function push(fulltag,depth) + local tag, n = lpegmatch(dashsplitter,fulltag) + local tg, detail = lpegmatch(colonsplitter,tag) + local element, nature + if detail then + local pd = properties[tag] + local pt = properties[tg] + element = pd and pd.export or pt and pt.export or tg + nature = pd and pd.nature or pt and pt.nature or defaultnature + else + local p = properties[tg] + element = p and p.export or tg + nature = p and p.nature or "inline" end - for i=#treestack,1,-1 do - pop() + local treedata = tree.data + local t = { + tg = tg, + fulltag = fulltag, + detail = detail, + n = tonumber(n), -- more efficient + depth = depth, + element = element, + nature = nature, + data = { }, + attribute = currentattribute, + parnumber = currentparagraph, + } + treedata[#treedata+1] = t + currentdepth = currentdepth + 1 + nesting[currentdepth] = fulltag + treestack[currentdepth] = tree + if trace_export then + report_export("%s<%s trigger='%s' index='%s'>",spaces[currentdepth-1],fulltag,currentattribute,#treedata) + end + tree = t + if tg == "break" then + -- no need for this + else + local h = treehash[fulltag] + if h then + h[#h+1] = t + else + treehash[fulltag] = { t } + end end end -local displaymapping = { - inline = "inline", - display = "block", - mixed = "inline", -} - -local e_template = [[ -%s { - display: %s ; -}]] - -local d_template = [[ -%s[detail=%s] { - display: %s ; -}]] - --- encoding='utf-8' - -local xmlpreamble = [[ -<?xml version='1.0' encoding='UTF-8' standalone='yes' ?> - -<!-- input filename : %- 17s --> -<!-- processing date : %- 17s --> -<!-- context version : %- 17s --> -<!-- exporter version : %- 17s --> -]] - -local csspreamble = [[ - -<?xml-stylesheet type="text/css" href="%s"?> -]] - -local cssfile, xhtmlfile = nil, nil - -directives.register("backend.export.css", function(v) cssfile = v end) -directives.register("backend.export.xhtml",function(v) xhtmlfile = v end) - -local function stopexport(v) - starttiming(treehash) - report_export("finalizing") - finishexport() - collapsetree() - if trace_tree then - prunetree(tree) - report_export(table.serialize(tree,"root")) +local function pop() + local top = nesting[currentdepth] + tree = treestack[currentdepth] + currentdepth = currentdepth - 1 + if trace_export then + if top then + report_export("%s</%s>",spaces[currentdepth],top) + else + report_export("</%s>",top) + end end - checkinserts(tree.data) - hashlistdata() - if type(v) ~= "string" or v == variables.yes or v == "" then - v = tex.jobname +end + +local function continueexport() + if nofcurrentcontent > 0 then + if trace_export then + report_export("%s<!-- injecting pagebreak space -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = " " -- pagebreak end - local xmlfile = file.addsuffix(v,"export") - local handle = io.open(xmlfile,"wb") - if handle then - local files = { } - local specification = { - name = file.removesuffix(v), - identifier = os.uuid(), - files = files, - } - report_export("saving xml data in '%s",xmlfile) - handle:write(format(xmlpreamble,tex.jobname,os.date(),environment.version,version)) - if type(cssfile) == "string" then - local cssfiles = settings_to_array(cssfile) - for i=1,#cssfiles do - local cssfile = cssfiles[i] - files[#files+1] = cssfile - if type(cssfile) ~= "string" or cssfile == variables.yes or cssfile == "" or cssfile == xmlfile then - cssfile = file.replacesuffix(xmlfile,"css") - else - cssfile = file.addsuffix(cssfile,"css") - end - report_export("adding css reference '%s",cssfile) - handle:write(format(csspreamble,cssfile)) - end +end + +local function pushentry(current) + if current then + if restart then + continueexport() + restart = false end - flushtree(handle,tree.data) - handle:close() - -- css template file - local cssfile = file.replacesuffix(xmlfile,"template") - report_export("saving css template in '%s",cssfile) - local templates = { format("/* template for file %s */",xmlfile) } - for element, details in table.sortedhash(used) do - templates[#templates+1] = format("/* category: %s */",element) - for detail, nature in table.sortedhash(details) do - local d = displaymapping[nature or "display"] or "block" - if detail == "" then - templates[#templates+1] = format(e_template,element,d) - else - templates[#templates+1] = format(d_template,element,detail,d) - end - end + local newdepth = #current + local olddepth = currentdepth + if trace_export then + report_export("%s<!-- moving from depth %s to %s (%s) -->",spaces[currentdepth],olddepth,newdepth,current[newdepth]) end - io.savedata(cssfile,concat(templates,"\n\n")) - -- xhtml references - if xhtmlfile then - -- messy - if type(v) ~= "string" or xhtmlfile == true or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then - xhtmlfile = file.replacesuffix(xmlfile,"xhtml") - else - xhtmlfile = file.addsuffix(xhtmlfile,"xhtml") + if olddepth <= 0 then + for i=1,newdepth do + push(current[i],i) end - report_export("saving xhtml variant in '%s",xhtmlfile) - local xmltree = xml.load(xmlfile) - if xmltree then - local xmlwrap = xml.wrap - for e in xml.collected(xmltree,"/document") do - e.at["xmlns:xhtml"] = "http://www.w3.org/1999/xhtml" - break - end - local wrapper = { tg = "a", ns = "xhtml", at = { href = "unknown" } } - for e in xml.collected(xmltree,"link") do - local location = e.at.location - if location then - wrapper.at.href = "#" .. gsub(location,":","_") - xmlwrap(e,wrapper) + else + local difference + if olddepth < newdepth then + for i=1,olddepth do + if current[i] ~= nesting[i] then + difference = i + break end end - local wrapper = { tg = "a", ns = "xhtml", at = { name = "unknown" } } - for e in xml.collected(xmltree,"!link[@location]") do - local location = e.at.location - if location then - wrapper.at.name = gsub(location,":","_") - xmlwrap(e,wrapper) + else + for i=1,newdepth do + if current[i] ~= nesting[i] then + difference = i + break end end - xml.save(xmltree,xhtmlfile) end - files[#files+1] = xhtmlfile - specification.root = xhtmlfile - local specfile = file.replacesuffix(xmlfile,"specification") - report_export("saving specification in '%s' (mtxrun --script epub --make %s)",specfile,specfile) - io.savedata(specfile,table.serialize(specification,true)) + if difference then + for i=olddepth,difference,-1 do + pop() + end + for i=difference,newdepth do + push(current[i],i) + end + elseif newdepth > olddepth then + for i=olddepth+1,newdepth do + push(current[i],i) + end + elseif newdepth < olddepth then + for i=olddepth,newdepth,-1 do + pop() + end + elseif trace_export then + report_export("%s<!-- staying at depth %s (%s) -->",spaces[currentdepth],newdepth,nesting[newdepth] or "?") + end end - else - report_export("unable to saving xml in '%s",xmlfile) + return olddepth, newdepth end - stoptiming(treehash) end -local function startexport(v) - if v and not exporting then - nodes.tasks.appendaction("shipouts", "normalizers", "nodes.handlers.export") - report_export("enabling export to xml") - luatex.registerstopactions(function() stopexport(v) end) - if trace_spaces then - joiner_1 = "<S1/>" joiner_2 = "<S2/>" joiner_3 = "<S3/>" joiner_4 = "<S4/>" joiner_5 = "<S5/>" - joiner_6 = "<S6/>" joiner_7 = "<S7/>" joiner_8 = "<S8/>" joiner_9 = "<S9/>" joiner_0 = "<S0/>" +local function pushcontent(addbreak) + if nofcurrentcontent > 0 then + if addbreak then + if currentcontent[nofcurrentcontent] == "\n" then + if trace_export then + report_export("%s<!-- removing newline -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent - 1 + end end - exporting = true + local content = concat(currentcontent,"",1,nofcurrentcontent) + if content == "" then + -- omit; when addbreak we could push, remove spaces, pop +--~ elseif content == " " and addbreak then + elseif somespace[content] and addbreak then + -- omit; when addbreak we could push, remove spaces, pop + else + local olddepth, newdepth + local list = taglist[currentattribute] + if list then + olddepth, newdepth = pushentry(list) + end + local td = tree.data + local nd = #td + td[nd+1] = content + if trace_export then + report_export("%s<!-- start content with length %s -->",spaces[currentdepth],#content) + report_export("%s%s",spaces[currentdepth],content) + report_export("%s<!-- stop content -->",spaces[currentdepth]) + end + if olddepth then + for i=newdepth-1,olddepth,-1 do + pop() + end + end + end + nofcurrentcontent = 0 + end + if addbreak then + pushentry(makebreaklist(currentnesting)) + -- if trace_export then + -- report_export("%s<!-- add break -->",spaces[currentdepth]) + -- end end end -directives.register("backend.export",startexport) -- maybe .name - -local function injectbreak() - flushresult(entry) - flushresult(makebreak(entry)) - result = { } - entry = { entry[1], result, last, lastpar } -- entry[1] ? -end - -local function injectspace(a,joiner) - flushresult(entry) - result = { joiner } - local tl = taglist[a] - entry = { tl , result, a, lastpar, n } +local function finishexport() + if trace_export then + report_export("%s<!-- start finalizing -->",spaces[currentdepth]) + end + if nofcurrentcontent > 0 then + if somespace[currentcontent[nofcurrentcontent]] then + if trace_export then + report_export("%s<!-- removing space -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent - 1 + end + pushcontent() + end + for i=currentdepth,1,-1 do + pop() + end + currentcontent = { } -- we're nice and do a cleanup + if trace_export then + report_export("%s<!-- stop finalizing -->",spaces[currentdepth]) + end end -local function collectresults(head,list,p) - local preceding = p or false +local function collectresults(head,list) for n in traverse_nodes(head) do local id = n.id -- 14: image, 8: literal (mp) if id == glyph_code then @@ -1198,23 +1569,19 @@ local function collectresults(head,list,p) -- we could add tonunicodes for ligatures (todo) local components = n.components if components then -- we loose data - collectresults(components,nil,preceding) ---~ preceding = true + collectresults(components,nil) else + local c = n.char if last ~= at then local tl = taglist[at] - if entry then - flushresult(entry) - end - if preceding then - preceding = false - result = { joiner_5 } - else - result = { } + if trace_export then + report_export("%s<!-- processing glyph %s (tag %s) -->",spaces[currentdepth],utfchar(c),at) end - lastpar = has_attribute(n,a_taggedpar) - entry = { tl, result, at, lastpar, n } - local ah = { -- this includes detail ! + pushcontent() + currentparagraph = has_attribute(n,a_taggedpar) + currentnesting = tl + currentattribute = at + local ah = { -- this includes detail ! -- we can move some to te tex end align = has_attribute(n,a_taggedalign ), columns = has_attribute(n,a_taggedcolumns), rows = has_attribute(n,a_taggedrows ), @@ -1228,22 +1595,62 @@ local function collectresults(head,list,p) attributehash[tl[#tl]] = ah end last = at + pushentry(currentnesting) elseif last then local at = has_attribute(n,a_taggedpar) - if at ~= lastpar then - injectbreak() - lastpar = at + if at ~= currentparagraph then + -- inject break + pushcontent(true) -- add break + pushentry(currentnesting) + currentattribute = last + currentparagraph = at + end + if trace_export then + report_export("%s<!-- processing glyph %s (tag %s) -->",spaces[currentdepth],utfchar(c),last) + end + else + if trace_export then + report_export("%s<!-- processing glyph %s (tag %s) -->",spaces[currentdepth],utfchar(c),at) end end - local c = n.char - if c == 0x26 then - result[#result+1] = "&" - elseif c == 0x3E then - result[#result+1] = ">" - elseif c == 0x3C then - result[#result+1] = "<" - elseif c == 0 then - result[#result+1] = "" -- utfchar(0) -- todo: check if "" is needed + local s = has_attribute(n,a_exportstatus) + if s then + c = s + end + if c == 0 then + if trace_export then + report_export("%s<!-- skipping last glyph -->",spaces[currentdepth]) + end + -- skip +--~ elseif c == 0x26 then +--~ nofcurrentcontent = nofcurrentcontent + 1 +--~ currentcontent[nofcurrentcontent] = "&" +--~ if trace_export then +--~ report_export("%s<!-- turning last glyph into entity &-->",spaces[currentdepth]) +--~ end +--~ elseif c == 0x3E then +--~ nofcurrentcontent = nofcurrentcontent + 1 +--~ currentcontent[nofcurrentcontent] = ">" +--~ if trace_export then +--~ report_export("%s<!-- turning last glyph into entity >-->",spaces[currentdepth]) +--~ end +--~ elseif c == 0x3C then +--~ nofcurrentcontent = nofcurrentcontent + 1 +--~ currentcontent[nofcurrentcontent] = "<" +--~ if trace_export then +--~ report_export("%s<!-- turning last glyph into entity <-->",spaces[currentdepth]) +--~ end + elseif c == 0x20 then + local a = has_attribute(n,a_characters) + nofcurrentcontent = nofcurrentcontent + 1 + if a then + if trace_export then + report_export("%s<!-- turning last space into special space U+%05X -->",spaces[currentdepth],a) + end + currentcontent[nofcurrentcontent] = specialspaces[a] -- special space + else + currentcontent[nofcurrentcontent] = " " + end else local fc = fontchar[n.font] if fc then @@ -1251,17 +1658,22 @@ local function collectresults(head,list,p) if fc then local u = fc.tounicode if u and u ~= "" then + -- tracing for s in gmatch(u,"....") do -- is this ok? - result[#result+1] = utfchar(tonumber(s,16)) + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = utfchar(tonumber(s,16)) end else - result[#result+1] = utfchar(c) + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = utfchar(c) end else -- weird, happens in hz (we really need to get rid of the pseudo fonts) - result[#result+1] = utfchar(c) + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = utfchar(c) end else - result[#result+1] = utfchar(c) + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = utfchar(c) end end end @@ -1270,112 +1682,357 @@ local function collectresults(head,list,p) local ai = has_attribute(n,a_image) if ai then local at = has_attribute(n,a_tagged) - if entry then - flushresult(entry) - result = { } - entry[2] = result -- mess, to be sorted out, but otherwise duplicates (still some spacing issues) + if nofcurrentcontent > 0 then + pushcontent() + pushentry(currentnesting) -- ?? end local tl = taglist[at] local i = locate_node(n,whatsit_code,refximage_code) if i then attributehash[tl[#tl]] = { imageindex = i.index } end - flushresult { tl, { }, 0, 0 } -- has an index, todo: flag empty element + pushentry(tl) -- has an index, todo: flag empty element + if trace_export then + report_export("%s<!-- processing image (tag %s)",spaces[currentdepth],last) + end last = nil - lastpar = nil + currentparagraph = nil else ---~ if result and #result > 0 then -- and n.subtype == line_code then ---~ local r = result[#result] ---~ if type(r) == "string" and r ~= " " then ---~ local s = utfsub(r,-1) ---~ if s == hyphen then ---~ result[#result] = utfsub(r,1,-2) ---~ elseif s ~= joiner_7 then ---~ result[#result] = r .. joiner_7 ---~ --~ preceding = true ---~ end ---~ end ---~ preceding = false ---~ end -- we need to determine an end-of-line - preceding = collectresults(n.list,n,preceding) - preceding = false + collectresults(n.list,n) end elseif id == disc_code then -- probably too late collectresults(n.replace,nil) - preceding = false elseif id == glue_code then -- we need to distinguish between hskips and vskips local subtype = n.subtype - if subtype == userskip_code then -- todo space_code - if n.spec.width > threshold then ---~ preceding = true - if result and last and #result > 0 and result[#result] ~= " " then + if subtype == userskip_code then + local ca = has_attribute(n,a_characters) + if ca then + if ca == 0 then + -- skip this one ... already converted special character (node-acc) + else + local a = has_attribute(n,a_tagged) + if somespace[currentcontent[nofcurrentcontent]] then + if trace_export then + report_export("%s<!-- removing space -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent - 1 + end + if last ~= a then + pushcontent() + last = a + currentnesting = taglist[last] + pushentry(currentnesting) + currentattribute = last + end + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = specialspaces[ca] -- utfchar(ca) + if trace_export then + report_export("%s<!-- adding special space/glue (tag %s => %s) -->",spaces[currentdepth],last,a) + end + end + elseif n.spec.width > threshold then +--~ if has_attribute(n,a_textblock) then +--~ -- todo +--~ else + if last and not somespace[currentcontent[nofcurrentcontent]] then local a = has_attribute(n,a_tagged) if a == last then - result[#result+1] = joiner_6 - preceding = false + if trace_export then + report_export("%s<!-- injecting spacing 5a -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = " " elseif a then -- e.g LOGO<space>LOGO - preceding = false + if trace_export then + report_export("%s<!-- processing glue > threshold (tag %s => %s) -->",spaces[currentdepth],last,a) + end + pushcontent() + if trace_export then + report_export("%s<!-- injecting spacing 5b -->",spaces[currentdepth]) + end last = a - injectspace(last,joiner_6) + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = " " + currentnesting = taglist[last] + pushentry(currentnesting) + currentattribute = last end end end +--~ end + elseif subtype == spaceskip_code or subtype == xspaceskip_code then + if not somespace[currentcontent[nofcurrentcontent]] then + if trace_export then + report_export("%s<!-- injecting spacing 7 -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = " " + end elseif subtype == rightskip_code or subtype == parfillskip_code then -if result and #result > 0 then -- and n.subtype == line_code then - local r = result[#result] - if type(r) == "string" and r ~= " " then - local s = utfsub(r,-1) - if s == hyphen then - result[#result] = utfsub(r,1,-2) - elseif s ~= joiner_7 then - result[#result] = r .. joiner_7 ---~ preceding = true - end - end - preceding = false -end + if nofcurrentcontent > 0 then -- and n.subtype == line_code then + local r = currentcontent[nofcurrentcontent] + if type(r) == "string" and r ~= " " then + local s = utfsub(r,-1) + if s == hyphen then + currentcontent[nofcurrentcontent] = utfsub(r,1,-2) + elseif s ~= "\n" then + if trace_export then + report_export("%s<!-- injecting newline 1 -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = "\n" + end + end + end end elseif id == kern_code then if n.kern > threshold then ---~ preceding = true - if result and last and #result > 0 and result[#result] ~= " " then + if last and not somespace[currentcontent[nofcurrentcontent]] then local a = has_attribute(n,a_tagged) if a == last then - result[#result+1] = joiner_8 - preceding = false + if not somespace[currentcontent[nofcurrentcontent]] then + if trace_export then + report_export("%s<!-- injecting spacing 8 -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = " " + end elseif a then -- e.g LOGO<space>LOGO - preceding = false + if trace_export then + report_export("%s<!-- processing kern > threshold (tag %s => %s)",spaces[currentdepth],last,a) + end last = a - injectspace(last,joiner_8) + pushcontent() + if trace_export then + report_export("%s<!-- injecting spacing 9 -->",spaces[currentdepth]) + end + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = " " + currentnesting = taglist[last] + pushentry(currentnesting) + currentattribute = last end end end end end - return preceding end -function nodes.handlers.export(head) - if result then - -- maybe we need a better test for what is in result so far - if page_breaks then - joiner_0 = "<pagebreak/>" - end - result[#result+1] = joiner_0 - end +function nodes.handlers.export(head) -- hooks into the page builder starttiming(treehash) + if trace_export then + report_export("%s<!-- start flushing page -->",spaces[currentdepth]) + end + -- continueexport() + restart = true collectresults(head) - -- no flush here, pending page stuff + if trace_export then + report_export("%s<!-- stop flushing page -->",spaces[currentdepth]) + end stoptiming(treehash) return head, true end +function builders.paragraphs.tag(head) + noftextblocks = noftextblocks + 1 + for n in traverse_id(hlist_code,head) do + local subtype = n.subtype + if subtype == line_code then + set_attribute(n,a_textblock,noftextblocks) + elseif subtype == glue_code or subtype == kern_code then + set_attribute(n,a_textblock,0) + end + end + return false +end + +-- wrapper + +local displaymapping = { + inline = "inline", + display = "block", + mixed = "inline", +} + +local e_template = [[ +%s { + display: %s ; +}]] + +local d_template = [[ +%s[detail=%s] { + display: %s ; +}]] + +-- encoding='utf-8' + +local xmlpreamble = [[ +<?xml version='1.0' encoding='UTF-8' standalone='yes' ?> + +<!-- input filename : %- 17s --> +<!-- processing date : %- 17s --> +<!-- context version : %- 17s --> +<!-- exporter version : %- 17s --> +]] + +local csspreamble = [[ + +<?xml-stylesheet type="text/css" href="%s"?> +]] + +-- local xhtmlpreamble = [[ +-- <!DOCTYPE html PUBLIC +-- "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" +-- "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd" +-- > +-- ]] + +local cssfile, xhtmlfile = nil, nil + +directives.register("backend.export.css", function(v) cssfile = v end) +directives.register("backend.export.xhtml",function(v) xhtmlfile = v end) + +local function stopexport(v) + starttiming(treehash) + finishexport() + collapsetree(tree) + indextree(tree) + checkinserts(tree.data) + hashlistdata() + if type(v) ~= "string" or v == variables.yes or v == "" then + v = tex.jobname + end + local xmlfile = file.addsuffix(v,"export") + if type(cssfile) ~= "string" or cssfile == "" then + cssfile = nil + end + local files = { } + local specification = { + name = file.removesuffix(v), + identifier = os.uuid(), + files = files, + } + report_export("saving xml data in '%s",xmlfile) + local results = { } + results[#results+1] = format(xmlpreamble,tex.jobname,os.date(),environment.version,exportversion) + if cssfile then + local cssfiles = settings_to_array(cssfile) + for i=1,#cssfiles do + local cssfile = cssfiles[i] + files[#files+1] = cssfile + if type(cssfile) ~= "string" or cssfile == variables.yes or cssfile == "" or cssfile == xmlfile then + cssfile = file.replacesuffix(xmlfile,"css") + else + cssfile = file.addsuffix(cssfile,"css") + end + report_export("adding css reference '%s",cssfile) + results[#results+1] = format(csspreamble,cssfile) + end + end + -- collect tree + local result = { } + flushtree(result,tree.data) + result = concat(result) + result = gsub(result,"\n *\n","\n") + results[#results+1] = result + results = concat(results) + -- if needed we can do a cleanup of the tree (no need to load for xhtml then) + -- write to file + io.savedata(xmlfile,results) + -- css template file + if cssfile then + local cssfile = file.replacesuffix(xmlfile,"template") + report_export("saving css template in '%s",cssfile) + local templates = { format("/* template for file %s */",xmlfile) } + for element, details in table.sortedhash(used) do + templates[#templates+1] = format("/* category: %s */",element) + for detail, nature in table.sortedhash(details) do + local d = displaymapping[nature or "display"] or "block" + if detail == "" then + templates[#templates+1] = format(e_template,element,d) + else + templates[#templates+1] = format(d_template,element,detail,d) + end + end + end + io.savedata(cssfile,concat(templates,"\n\n")) + end + -- xhtml references + if xhtmlfile then + -- messy + if type(v) ~= "string" or xhtmlfile == true or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then + xhtmlfile = file.replacesuffix(xmlfile,"xhtml") + else + xhtmlfile = file.addsuffix(xhtmlfile,"xhtml") + end + report_export("saving xhtml variant in '%s",xhtmlfile) + -- local xmltree = xml.load(xmlfile) + local xmltree = xml.convert(results) + if xmltree then + local xmlwrap = xml.wrap + for e in xml.collected(xmltree,"/document") do + e.at["xmlns:xhtml"] = "http://www.w3.org/1999/xhtml" + break + end + -- todo: inject xhtmlpreamble (xmlns should have be enough) + local wrapper = { tg = "a", ns = "xhtml", at = { href = "unknown" } } + for e in xml.collected(xmltree,"link") do + local location = e.at.location + if location then + wrapper.at.href = "#" .. gsub(location,":","_") + xmlwrap(e,wrapper) + end + end + local wrapper = { tg = "a", ns = "xhtml", at = { name = "unknown" } } + for e in xml.collected(xmltree,"!link[@location]") do + local location = e.at.location + if location then + wrapper.at.name = gsub(location,":","_") + xmlwrap(e,wrapper) + end + end + xml.save(xmltree,xhtmlfile) + end + files[#files+1] = xhtmlfile + specification.root = xhtmlfile + local specfile = file.replacesuffix(xmlfile,"specification") + report_export("saving specification in '%s' (mtxrun --script epub --make %s)",specfile,specfile) + io.savedata(specfile,table.serialize(specification,true)) + end + stoptiming(treehash) +end + +local appendaction = nodes.tasks.appendaction +local enableaction = nodes.tasks.enableaction + +local function startexport(v) + if v and not exporting then + report_export("enabling export to xml") +-- not yet known in task-ini + appendaction("shipouts", "normalizers", "nodes.handlers.export") +-- enableaction("shipouts","nodes.handlers.export") +-- + enableaction("shipouts","nodes.handlers.accessibility") + enableaction("math", "noads.handlers.tags") +--~ enableaction("finalizers","builders.paragraphs.tag") + luatex.registerstopactions(function() stopexport(v) end) + exporting = true + end +end + +directives.register("backend.export",startexport) -- maybe .name + statistics.register("xml exporting time", function() if exporting then - return format("%s seconds", statistics.elapsedtime(treehash)) + return format("%s seconds, version %s", statistics.elapsedtime(treehash),exportversion) end end) + +commands.settagitemgroup = structurestags.setitemgroup +commands.settagsynonym = structurestags.setsynonym +commands.settagsorting = structurestags.setsorting +commands.settagdescriptionid = structurestags.setdescriptionid |