diff options
Diffstat (limited to 'tex/context/base/math-tag.lua')
-rw-r--r-- | tex/context/base/math-tag.lua | 637 |
1 files changed, 422 insertions, 215 deletions
diff --git a/tex/context/base/math-tag.lua b/tex/context/base/math-tag.lua index ab5902dd4..0d900b3a1 100644 --- a/tex/context/base/math-tag.lua +++ b/tex/context/base/math-tag.lua @@ -6,15 +6,30 @@ if not modules then modules = { } end modules ['math-tag'] = { license = "see context related readme files" } +-- todo: have a local list with local tags that then get appended + -- use lpeg matchers local find, match = string.find, string.match -local insert, remove = table.insert, table.remove +local insert, remove, concat = table.insert, table.remove, table.concat + +local attributes = attributes +local nodes = nodes -local attributes, nodes = attributes, nodes +local nuts = nodes.nuts +local tonut = nuts.tonut -local set_attributes = nodes.setattributes -local traverse_nodes = node.traverse +local getnext = nuts.getnext +local getid = nuts.getid +local getchar = nuts.getchar +local getlist = nuts.getlist +local getfield = nuts.getfield +local getsubtype = nuts.getsubtype +local getattr = nuts.getattr +local setattr = nuts.setattr + +local set_attributes = nuts.setattributes +local traverse_nodes = nuts.traverse local nodecodes = nodes.nodecodes @@ -31,15 +46,32 @@ local math_style_code = nodecodes.style -- attr style local math_choice_code = nodecodes.choice -- attr display text script scriptscript local math_fence_code = nodecodes.fence -- attr subtype +local accentcodes = nodes.accentcodes + +local math_fixed_top = accentcodes.fixedtop +local math_fixed_bottom = accentcodes.fixedbottom +local math_fixed_both = accentcodes.fixedboth + +local kerncodes = nodes.kerncodes + +local fontkern_code = kerncodes.fontkern + local hlist_code = nodecodes.hlist local vlist_code = nodecodes.vlist local glyph_code = nodecodes.glyph +local disc_code = nodecodes.disc local glue_code = nodecodes.glue +local kern_code = nodecodes.kern +local math_code = nodecodes.math + +local processnoads = noads.process local a_tagged = attributes.private('tagged') +local a_taggedpar = attributes.private('taggedpar') local a_exportstatus = attributes.private('exportstatus') local a_mathcategory = attributes.private('mathcategory') local a_mathmode = attributes.private('mathmode') +local a_fontkern = attributes.private('fontkern') local tags = structures.tags @@ -55,31 +87,53 @@ local mathcodes = mathematics.codes local ordinary_code = mathcodes.ordinary local variable_code = mathcodes.variable +local fromunicode16 = fonts.mappings.fromunicode16 +local font_of_family = node.family_font +local fontcharacters = fonts.hashes.characters + +local report_tags = logs.reporter("structure","tags") + local process local function processsubsup(start) -- At some point we might need to add an attribute signaling the -- super- and subscripts because TeX and MathML use a different - -- order. - local nucleus, sup, sub = start.nucleus, start.sup, start.sub + -- order. The mrows are needed to keep mn's separated. + local nucleus = getfield(start,"nucleus") + local sup = getfield(start,"sup") + local sub = getfield(start,"sub") if sub then if sup then - start[a_tagged] = start_tagged("msubsup") + setattr(start,a_tagged,start_tagged("msubsup")) + -- start_tagged("mrow") process(nucleus) + -- stop_tagged() + start_tagged("mrow") process(sub) + stop_tagged() + start_tagged("mrow") process(sup) stop_tagged() + stop_tagged() else - start[a_tagged] = start_tagged("msub") + setattr(start,a_tagged,start_tagged("msub")) + -- start_tagged("mrow") process(nucleus) + -- stop_tagged() + start_tagged("mrow") process(sub) stop_tagged() + stop_tagged() end elseif sup then - start[a_tagged] = start_tagged("msup") + setattr(start,a_tagged,start_tagged("msup")) + -- start_tagged("mrow") process(nucleus) + -- stop_tagged() + start_tagged("mrow") process(sup) stop_tagged() + stop_tagged() else process(nucleus) end @@ -90,254 +144,407 @@ end -- todo: variants -> original local actionstack = { } +local fencesstack = { } + +-- glyph nodes and such can happen in under and over stuff + +local function getunicode(n) -- instead of getchar + local char = getchar(n) + local font = font_of_family(getfield(n,"fam")) -- font_of_family + local data = fontcharacters[font][char] + return data.unicode or char +end + +------------------- + +local content = { } +local found = false + +content[math_char_code] = function() found = true end + +local function hascontent(head) + found = false + processnoads(head,content,"content") + return found +end + +-------------------- + +local function showtag(n,id) + local attr = getattr(n,a_tagged) + report_tags("%s = %s",nodecodes[id or getid(n)],attr and taglist[attr].tagname or "?") +end process = function(start) -- we cannot use the processor as we have no finalizers (yet) + local mtexttag = nil while start do - local id = start.id - if id == math_char_code then - local char = start.char - -- check for code - local a = start[a_mathcategory] - if a then - a = { detail = a } + local id = getid(start) + +-- showtag(start,id) + + if id == glyph_code or id == disc_code then + if not mtexttag then + mtexttag = start_tagged("mtext") end - local code = getmathcode(char) - if code then - code = code[1] + setattr(start,a_tagged,mtexttag) + elseif mtexttag and id == kern_code and (getsubtype(start) == fontkern_code or getattr(start,a_fontkern)) then + setattr(start,a_tagged,mtexttag) + else + if mtexttag then + stop_tagged() + mtexttag = nil end - local tag - if code == ordinary_code or code == variable_code then - local ch = chardata[char] - local mc = ch and ch.mathclass - if mc == "number" then - tag = "mn" - elseif mc == "variable" or not mc then -- variable is default - tag = "mi" + if id == math_char_code then + local char = getchar(start) + local code = getmathcode(char) + if code then + code = code[1] + end + local tag + if code == ordinary_code or code == variable_code then + local ch = chardata[char] + local mc = ch and ch.mathclass + if mc == "number" then + tag = "mn" + elseif mc == "variable" or not mc then -- variable is default + tag = "mi" + else + tag = "mo" + end else tag = "mo" end - else - tag = "mo" - end - start[a_tagged] = start_tagged(tag,a) - stop_tagged() - break -- okay? - elseif id == math_textchar_code then - -- check for code - local a = start[a_mathcategory] - if a then - start[a_tagged] = start_tagged("ms",{ detail = a }) - else - start[a_tagged] = start_tagged("ms") - end - stop_tagged() - break - elseif id == math_delim_code then - -- check for code - start[a_tagged] = start_tagged("mo") - stop_tagged() - break - elseif id == math_style_code then - -- has a next - elseif id == math_noad_code then - processsubsup(start) - elseif id == math_box_code or id == hlist_code or id == vlist_code then - -- keep an eye on math_box_code and see what ends up in there - local attr = start[a_tagged] - local last = attr and taglist[attr] - if last and find(last[#last],"formulacaption[:%-]") then - -- leave alone, will nicely move to the outer level - else - local text = start_tagged("mtext") - start[a_tagged] = text - local list = start.list - if not list then - -- empty list - elseif not attr then - -- box comes from strange place - set_attributes(list,a_tagged,text) + local a = getattr(start,a_mathcategory) + if a then + setattr(start,a_tagged,start_tagged(tag,{ mathcategory = a })) else - -- Beware, the first node in list is the actual list so we definitely - -- need to nest. This approach is a hack, maybe I'll make a proper - -- nesting feature to deal with this at another level. Here we just - -- fake structure by enforcing the inner one. - local tagdata = taglist[attr] - local common = #tagdata + 1 - local function runner(list) -- quite inefficient - local cache = { } -- we can have nested unboxed mess so best local to runner - for n in traverse_nodes(list) do - local id = n.id - local aa = n[a_tagged] - if aa then - local ac = cache[aa] - if not ac then - local tagdata = taglist[aa] - local extra = #tagdata - if common <= extra then - for i=common,extra do - ac = restart_tagged(tagdata[i]) -- can be made faster - end - for i=common,extra do - stop_tagged() -- can be made faster + setattr(start,a_tagged,start_tagged(tag)) -- todo: a_mathcategory + end + stop_tagged() + break -- okay? + elseif id == math_textchar_code then -- or id == glyph_code + -- check for code + local a = getattr(start,a_mathcategory) + if a then + setattr(start,a_tagged,start_tagged("ms",{ mathcategory = a })) -- mtext + else + setattr(start,a_tagged,start_tagged("ms")) -- mtext + end + stop_tagged() + break + elseif id == math_delim_code then + -- check for code + setattr(start,a_tagged,start_tagged("mo")) + stop_tagged() + break + elseif id == math_style_code then + -- has a next + elseif id == math_noad_code then + processsubsup(start) + elseif id == math_box_code or id == hlist_code or id == vlist_code then + -- keep an eye on math_box_code and see what ends up in there + local attr = getattr(start,a_tagged) + local specification = taglist[attr] + local tag = specification.tagname + if tag == "formulacaption" then + -- skip + elseif tag == "mstacker" then + local list = getfield(start,"list") + if list then + process(list) + end + else + if tag ~= "mstackertop" and tag ~= "mstackermid" and tag ~= "mstackerbot" then + tag = "mtext" + end + local text = start_tagged(tag) + setattr(start,a_tagged,text) + local list = getfield(start,"list") + if not list then + -- empty list + elseif not attr then + -- box comes from strange place + set_attributes(list,a_tagged,text) -- only the first node ? + else + -- Beware, the first node in list is the actual list so we definitely + -- need to nest. This approach is a hack, maybe I'll make a proper + -- nesting feature to deal with this at another level. Here we just + -- fake structure by enforcing the inner one. + -- + -- todo: have a local list with local tags that then get appended + -- + local tagdata = specification.taglist + local common = #tagdata + 1 + local function runner(list,depth) -- quite inefficient + local cache = { } -- we can have nested unboxed mess so best local to runner + local keep = nil + -- local keep = { } -- win case we might need to move keep outside + for n in traverse_nodes(list) do + local id = getid(n) + local mth = id == math_code and getsubtype(n) + if mth == 0 then + -- insert(keep,text) + keep = text + text = start_tagged("mrow") + common = common + 1 + end + local aa = getattr(n,a_tagged) + if aa then + local ac = cache[aa] + if not ac then + local tagdata = taglist[aa].taglist + local extra = #tagdata + if common <= extra then + for i=common,extra do + ac = restart_tagged(tagdata[i]) -- can be made faster + end + for i=common,extra do + stop_tagged() -- can be made faster + end + else + ac = text end - else - ac = text + cache[aa] = ac end - cache[aa] = ac + setattr(n,a_tagged,ac) + else + setattr(n,a_tagged,text) + end + + if id == hlist_code or id == vlist_code then + runner(getlist(n),depth+1) + elseif id == glyph_code then + runner(getfield(n,"components"),depth+1) -- this should not be needed + elseif id == disc_code then + runner(getfield(n,"pre"),depth+1) -- idem + runner(getfield(n,"post"),depth+1) -- idem + runner(getfield(n,"replace"),depth+1) -- idem + end + if mth == 1 then + stop_tagged() + -- text = remove(keep) + text = keep + common = common - 1 end - n[a_tagged] = ac - else - n[a_tagged] = text end - if id == hlist_code or id == vlist_code then - runner(n.list) + end + runner(list,0) + end + stop_tagged() + end + elseif id == math_sub_code then -- normally a hbox + local list = getfield(start,"list") + if list then + local attr = getattr(start,a_tagged) + local last = attr and taglist[attr] + if last then + local tag = last.tagname + local detail = last.detail + if tag == "maction" then + if detail == "" then + setattr(start,a_tagged,start_tagged("mrow")) + process(list) + stop_tagged() + elseif actionstack[#actionstack] == action then + setattr(start,a_tagged,start_tagged("mrow")) + process(list) + stop_tagged() + else + insert(actionstack,action) + setattr(start,a_tagged,start_tagged("mrow",{ detail = action })) + process(list) + stop_tagged() + remove(actionstack) end + elseif tag == "mstacker" then -- or tag == "mstackertop" or tag == "mstackermid" or tag == "mstackerbot" then + -- looks like it gets processed twice +-- do we still end up here ? + setattr(start,a_tagged,restart_tagged(attr)) -- so we just reuse the attribute + process(list) + stop_tagged() + else + setattr(start,a_tagged,start_tagged("mrow")) + process(list) + stop_tagged() end + else -- never happens, we're always document + setattr(start,a_tagged,start_tagged("mrow")) + process(list) + stop_tagged() end - runner(list) end + elseif id == math_fraction_code then + local num = getfield(start,"num") + local denom = getfield(start,"denom") + local left = getfield(start,"left") + local right = getfield(start,"right") + if left then + setattr(left,a_tagged,start_tagged("mo")) + process(left) + stop_tagged() + end + setattr(start,a_tagged,start_tagged("mfrac")) + process(num) + process(denom) stop_tagged() - end - elseif id == math_sub_code then - local list = start.list - if list then - local attr = start[a_tagged] - local last = attr and taglist[attr] - local action = last and match(last[#last],"maction:(.-)%-") - if action and action ~= "" then - if actionstack[#actionstack] == action then - start[a_tagged] = start_tagged("mrow") - process(list) + if right then + setattr(right,a_tagged,start_tagged("mo")) + process(right) + stop_tagged() + end + elseif id == math_choice_code then + local display = getfield(start,"display") + local text = getfield(start,"text") + local script = getfield(start,"script") + local scriptscript = getfield(start,"scriptscript") + if display then + process(display) + end + if text then + process(text) + end + if script then + process(script) + end + if scriptscript then + process(scriptscript) + end + elseif id == math_fence_code then + local delim = getfield(start,"delim") + local subtype = getfield(start,"subtype") + if subtype == 1 then + -- left + local properties = { } + insert(fencesstack,properties) + setattr(start,a_tagged,start_tagged("mfenced",{ properties = properties })) -- needs checking + if delim then + start_tagged("ignore") + local chr = getfield(delim,"small_char") + if chr ~= 0 then + properties.left = chr + end + process(delim) stop_tagged() - else - insert(actionstack,action) - start[a_tagged] = start_tagged("mrow",{ detail = action }) - process(list) + end + start_tagged("mrow") -- begin of subsequence + elseif subtype == 2 then + -- middle + if delim then + start_tagged("ignore") + local top = fencesstack[#fencesstack] + local chr = getfield(delim,"small_char") + if chr ~= 0 then + local mid = top.middle + if mid then + mid[#mid+1] = chr + else + top.middle = { chr } + end + end + process(delim) stop_tagged() - remove(actionstack) end - else - start[a_tagged] = start_tagged("mrow") - process(list) + stop_tagged() -- end of subsequence + start_tagged("mrow") -- begin of subsequence + elseif subtype == 3 then + local properties = remove(fencesstack) + if not properties then + report_tags("missing right fence") + properties = { } + end + if delim then + start_tagged("ignore") + local chr = getfield(delim,"small_char") + if chr ~= 0 then + properties.right = chr + end + process(delim) + stop_tagged() + end + stop_tagged() -- end of subsequence stop_tagged() + else + -- can't happen end - end - elseif id == math_fraction_code then - local num, denom, left, right = start.num, start.denom, start.left, start.right - if left then - left[a_tagged] = start_tagged("mo") - process(left) - stop_tagged() - end - start[a_tagged] = start_tagged("mfrac") - process(num) - process(denom) - stop_tagged() - if right then - right[a_tagged] = start_tagged("mo") - process(right) - stop_tagged() - end - elseif id == math_choice_code then - local display, text, script, scriptscript = start.display, start.text, start.script, start.scriptscript - if display then - process(display) - end - if text then - process(text) - end - if script then - process(script) - end - if scriptscript then - process(scriptscript) - end - elseif id == math_fence_code then - local delim = start.delim - local subtype = start.subtype - if subtype == 1 then - -- left - start[a_tagged] = start_tagged("mfenced") - if delim then - start[a_tagged] = start_tagged("mleft") - process(delim) + elseif id == math_radical_code then + local left = getfield(start,"left") + local degree = getfield(start,"degree") + if left then + start_tagged("ignore") + process(left) -- root symbol, ignored stop_tagged() end - elseif subtype == 2 then - -- middle - if delim then - start[a_tagged] = start_tagged("mmiddle") - process(delim) + if degree and hascontent(degree) then + setattr(start,a_tagged,start_tagged("mroot")) + processsubsup(start) + process(degree) stop_tagged() - end - elseif subtype == 3 then - if delim then - start[a_tagged] = start_tagged("mright") - process(delim) + else + setattr(start,a_tagged,start_tagged("msqrt")) + processsubsup(start) stop_tagged() end - stop_tagged() - else - -- can't happen - end - elseif id == math_radical_code then - local left, degree = start.left, start.degree - if left then - start_tagged("") - process(left) -- root symbol, ignored - stop_tagged() - end - if degree then -- not good enough, can be empty mlist - start[a_tagged] = start_tagged("mroot") - processsubsup(start) - process(degree) - stop_tagged() - else - start[a_tagged] = start_tagged("msqrt") - processsubsup(start) - stop_tagged() - end - elseif id == math_accent_code then - local accent, bot_accent = start.accent, start.bot_accent - if bot_accent then - if accent then - start[a_tagged] = start_tagged("munderover",{ detail = "accent" }) + elseif id == math_accent_code then + local accent = getfield(start,"accent") + local bot_accent = getfield(start,"bot_accent") + local subtype = getsubtype(start) + if bot_accent then + if accent then + setattr(start,a_tagged,start_tagged("munderover", { + accent = true, + top = getunicode(accent), + bottom = getunicode(bot_accent), + topfixed = subtype == math_fixed_top or subtype == math_fixed_both, + bottomfixed = subtype == math_fixed_bottom or subtype == math_fixed_both, + })) + processsubsup(start) + process(bot_accent) + process(accent) + stop_tagged() + else + setattr(start,a_tagged,start_tagged("munder", { + accent = true, + bottom = getunicode(bot_accent), + bottomfixed = subtype == math_fixed_bottom or subtype == math_fixed_both, + })) + processsubsup(start) + process(bot_accent) + stop_tagged() + end + elseif accent then + setattr(start,a_tagged,start_tagged("mover", { + accent = true, + top = getunicode(accent), + topfixed = subtype == math_fixed_top or subtype == math_fixed_both, + })) processsubsup(start) - process(bot_accent) process(accent) stop_tagged() else - start[a_tagged] = start_tagged("munder",{ detail = "accent" }) processsubsup(start) - process(bot_accent) - stop_tagged() end - elseif accent then - start[a_tagged] = start_tagged("mover",{ detail = "accent" }) - processsubsup(start) - process(accent) + elseif id == glue_code then + -- local spec = getfield(start,"spec") + -- setattr(start,a_tagged,start_tagged("mspace",{ width = getfield(spec,"width") })) + setattr(start,a_tagged,start_tagged("mspace")) stop_tagged() else - processsubsup(start) + setattr(start,a_tagged,start_tagged("merror", { detail = nodecodes[i] })) + stop_tagged() end - elseif id == glue_code then - start[a_tagged] = start_tagged("mspace") - stop_tagged() - else - start[a_tagged] = start_tagged("merror", { detail = nodecodes[i] }) - stop_tagged() end - start = start.next + start = getnext(start) + end + if mtexttag then + stop_tagged() end end function noads.handlers.tags(head,style,penalties) - local v_math = start_tagged("math") - local v_mrow = start_tagged("mrow") - local v_mode = head[a_mathmode] - head[a_tagged] = v_math - head[a_tagged] = v_mrow - tags.setattributehash(v_math,"mode",v_mode == 1 and "display" or "inline") + head = tonut(head) + local v_mode = getattr(head,a_mathmode) + local v_math = start_tagged("math", { mode = v_mode == 1 and "display" or "inline" }) + setattr(head,a_tagged,start_tagged("mrow")) process(head) stop_tagged() stop_tagged() |