summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/math-tag.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/math-tag.lua')
-rw-r--r--tex/context/base/mkiv/math-tag.lua555
1 files changed, 555 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/math-tag.lua b/tex/context/base/mkiv/math-tag.lua
new file mode 100644
index 000000000..e83b401fb
--- /dev/null
+++ b/tex/context/base/mkiv/math-tag.lua
@@ -0,0 +1,555 @@
+if not modules then modules = { } end modules ['math-tag'] = {
+ version = 1.001,
+ comment = "companion to math-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- todo: have a local list with local tags that then get appended
+-- todo: use tex.getmathcodes (no table)
+
+-- use lpeg matchers
+
+local find, match = string.find, string.match
+local insert, remove, concat = table.insert, table.remove, table.concat
+
+local attributes = attributes
+local nodes = nodes
+
+local nuts = nodes.nuts
+local tonut = nuts.tonut
+
+local getnext = nuts.getnext
+local getid = nuts.getid
+local getchar = nuts.getchar
+local getfont = nuts.getfont
+local getlist = nuts.getlist
+local getfield = nuts.getfield
+local getdisc = nuts.getdisc
+local getsubtype = nuts.getsubtype
+local getattr = nuts.getattr
+local setattr = nuts.setattr
+
+local set_attributes = nuts.setattributes
+local traverse_nodes = nuts.traverse
+
+local nodecodes = nodes.nodecodes
+
+local math_noad_code = nodecodes.noad -- attr nucleus sub sup
+local math_accent_code = nodecodes.accent -- attr nucleus sub sup accent
+local math_radical_code = nodecodes.radical -- attr nucleus sub sup left degree
+local math_fraction_code = nodecodes.fraction -- attr nucleus sub sup left right
+local math_box_code = nodecodes.subbox -- attr list
+local math_sub_code = nodecodes.submlist -- attr list
+local math_char_code = nodecodes.mathchar -- attr fam char
+local math_textchar_code = nodecodes.mathtextchar -- attr fam char
+local math_delim_code = nodecodes.delim -- attr small_fam small_char large_fam large_char
+local math_style_code = nodecodes.style -- attr style
+local math_choice_code = nodecodes.choice -- attr display text script scriptscript
+local math_fence_code = nodecodes.fence -- attr subtype
+
+local accentcodes = nodes.accentcodes
+
+local math_fixed_top = accentcodes.fixedtop
+local math_fixed_bottom = accentcodes.fixedbottom
+local math_fixed_both = accentcodes.fixedboth
+
+local kerncodes = nodes.kerncodes
+
+local fontkern_code = kerncodes.fontkern
+
+local hlist_code = nodecodes.hlist
+local vlist_code = nodecodes.vlist
+local glyph_code = nodecodes.glyph
+local disc_code = nodecodes.disc
+local glue_code = nodecodes.glue
+local kern_code = nodecodes.kern
+local math_code = nodecodes.math
+
+local processnoads = noads.process
+
+local a_tagged = attributes.private('tagged')
+local a_taggedpar = attributes.private('taggedpar')
+local a_exportstatus = attributes.private('exportstatus')
+local a_mathcategory = attributes.private('mathcategory')
+local a_mathmode = attributes.private('mathmode')
+local a_fontkern = attributes.private('fontkern')
+
+local tags = structures.tags
+
+local start_tagged = tags.start
+local restart_tagged = tags.restart
+local stop_tagged = tags.stop
+local taglist = tags.taglist
+
+local chardata = characters.data
+
+local getmathcode = tex.getmathcode
+local mathcodes = mathematics.codes
+local ordinary_code = mathcodes.ordinary
+local variable_code = mathcodes.variable
+
+local fromunicode16 = fonts.mappings.fromunicode16
+local fontcharacters = fonts.hashes.characters
+
+local report_tags = logs.reporter("structure","tags")
+
+local process
+
+local function processsubsup(start)
+ -- At some point we might need to add an attribute signaling the
+ -- super- and subscripts because TeX and MathML use a different
+ -- order. The mrows are needed to keep mn's separated.
+ local nucleus = getfield(start,"nucleus")
+ local sup = getfield(start,"sup")
+ local sub = getfield(start,"sub")
+ if sub then
+ if sup then
+ setattr(start,a_tagged,start_tagged("msubsup"))
+ -- start_tagged("mrow")
+ process(nucleus)
+ -- stop_tagged()
+ start_tagged("mrow")
+ process(sub)
+ stop_tagged()
+ start_tagged("mrow")
+ process(sup)
+ stop_tagged()
+ stop_tagged()
+ else
+ setattr(start,a_tagged,start_tagged("msub"))
+ -- start_tagged("mrow")
+ process(nucleus)
+ -- stop_tagged()
+ start_tagged("mrow")
+ process(sub)
+ stop_tagged()
+ stop_tagged()
+ end
+ elseif sup then
+ setattr(start,a_tagged,start_tagged("msup"))
+ -- start_tagged("mrow")
+ process(nucleus)
+ -- stop_tagged()
+ start_tagged("mrow")
+ process(sup)
+ stop_tagged()
+ stop_tagged()
+ else
+ process(nucleus)
+ end
+end
+
+-- todo: check function here and keep attribute the same
+
+-- todo: variants -> original
+
+local actionstack = { }
+local fencesstack = { }
+
+-- glyph nodes and such can happen in under and over stuff
+
+local function getunicode(n) -- instead of getchar
+ local char = getchar(n)
+ -- local font = font_of_family(getfield(n,"fam")) -- font_of_family
+ local font = getfont(n)
+ local data = fontcharacters[font][char]
+ return data.unicode or char
+end
+
+-------------------
+
+local content = { }
+local found = false
+
+content[math_char_code] = function() found = true end
+
+local function hascontent(head)
+ found = false
+ processnoads(head,content,"content")
+ return found
+end
+
+--------------------
+
+local function showtag(n,id)
+ local attr = getattr(n,a_tagged)
+ report_tags("%s = %s",nodecodes[id or getid(n)],attr and taglist[attr].tagname or "?")
+end
+
+process = function(start) -- we cannot use the processor as we have no finalizers (yet)
+ local mtexttag = nil
+ while start do
+ local id = getid(start)
+
+-- showtag(start,id)
+
+ if id == glyph_code or id == disc_code then
+ if not mtexttag then
+ mtexttag = start_tagged("mtext")
+ end
+ setattr(start,a_tagged,mtexttag)
+ elseif mtexttag and id == kern_code and (getsubtype(start) == fontkern_code or getattr(start,a_fontkern)) then
+ setattr(start,a_tagged,mtexttag)
+ else
+ if mtexttag then
+ stop_tagged()
+ mtexttag = nil
+ end
+ if id == math_char_code then
+ local char = getchar(start)
+ local code = getmathcode(char)
+ if code then
+ code = code[1]
+ end
+ local tag
+ if code == ordinary_code or code == variable_code then
+ local ch = chardata[char]
+ local mc = ch and ch.mathclass
+ if mc == "number" then
+ tag = "mn"
+ elseif mc == "variable" or not mc then -- variable is default
+ tag = "mi"
+ else
+ tag = "mo"
+ end
+ else
+ tag = "mo"
+ end
+ local a = getattr(start,a_mathcategory)
+ if a then
+ setattr(start,a_tagged,start_tagged(tag,{ mathcategory = a }))
+ else
+ setattr(start,a_tagged,start_tagged(tag)) -- todo: a_mathcategory
+ end
+ stop_tagged()
+ break -- okay?
+ elseif id == math_textchar_code then -- or id == glyph_code
+ -- check for code
+ local a = getattr(start,a_mathcategory)
+ if a then
+ setattr(start,a_tagged,start_tagged("ms",{ mathcategory = a })) -- mtext
+ else
+ setattr(start,a_tagged,start_tagged("ms")) -- mtext
+ end
+ stop_tagged()
+ break
+ elseif id == math_delim_code then
+ -- check for code
+ setattr(start,a_tagged,start_tagged("mo"))
+ stop_tagged()
+ break
+ elseif id == math_style_code then
+ -- has a next
+ elseif id == math_noad_code then
+ processsubsup(start)
+ elseif id == math_box_code or id == hlist_code or id == vlist_code then
+ -- keep an eye on math_box_code and see what ends up in there
+ local attr = getattr(start,a_tagged)
+ local specification = taglist[attr]
+ local tag = specification.tagname
+ if tag == "formulacaption" then
+ -- skip
+ elseif tag == "mstacker" then
+ local list = getfield(start,"list")
+ if list then
+ process(list)
+ end
+ else
+ if tag ~= "mstackertop" and tag ~= "mstackermid" and tag ~= "mstackerbot" then
+ tag = "mtext"
+ end
+ local text = start_tagged(tag)
+ setattr(start,a_tagged,text)
+ local list = getfield(start,"list")
+ if not list then
+ -- empty list
+ elseif not attr then
+ -- box comes from strange place
+ set_attributes(list,a_tagged,text) -- only the first node ?
+ else
+ -- Beware, the first node in list is the actual list so we definitely
+ -- need to nest. This approach is a hack, maybe I'll make a proper
+ -- nesting feature to deal with this at another level. Here we just
+ -- fake structure by enforcing the inner one.
+ --
+ -- todo: have a local list with local tags that then get appended
+ --
+ local tagdata = specification.taglist
+ local common = #tagdata + 1
+ local function runner(list,depth) -- quite inefficient
+ local cache = { } -- we can have nested unboxed mess so best local to runner
+ local keep = nil
+ -- local keep = { } -- win case we might need to move keep outside
+ for n in traverse_nodes(list) do
+ local id = getid(n)
+ local mth = id == math_code and getsubtype(n)
+ if mth == 0 then
+ -- insert(keep,text)
+ keep = text
+ text = start_tagged("mrow")
+ common = common + 1
+ end
+ local aa = getattr(n,a_tagged)
+ if aa then
+ local ac = cache[aa]
+ if not ac then
+ local tagdata = taglist[aa].taglist
+ local extra = #tagdata
+ if common <= extra then
+ for i=common,extra do
+ ac = restart_tagged(tagdata[i]) -- can be made faster
+ end
+ for i=common,extra do
+ stop_tagged() -- can be made faster
+ end
+ else
+ ac = text
+ end
+ cache[aa] = ac
+ end
+ setattr(n,a_tagged,ac)
+ else
+ setattr(n,a_tagged,text)
+ end
+
+ if id == hlist_code or id == vlist_code then
+ runner(getlist(n),depth+1)
+ elseif id == glyph_code then
+ runner(getfield(n,"components"),depth+1) -- this should not be needed
+ elseif id == disc_code then
+ local pre, post, replace = getdisc(n)
+ runner(pre,depth+1) -- idem
+ runner(post,depth+1) -- idem
+ runner(replace,depth+1) -- idem
+ end
+ if mth == 1 then
+ stop_tagged()
+ -- text = remove(keep)
+ text = keep
+ common = common - 1
+ end
+ end
+ end
+ runner(list,0)
+ end
+ stop_tagged()
+ end
+ elseif id == math_sub_code then -- normally a hbox
+ local list = getfield(start,"list")
+ if list then
+ local attr = getattr(start,a_tagged)
+ local last = attr and taglist[attr]
+ if last then
+ local tag = last.tagname
+ local detail = last.detail
+ if tag == "maction" then
+ if detail == "" then
+ setattr(start,a_tagged,start_tagged("mrow"))
+ process(list)
+ stop_tagged()
+ elseif actionstack[#actionstack] == action then
+ setattr(start,a_tagged,start_tagged("mrow"))
+ process(list)
+ stop_tagged()
+ else
+ insert(actionstack,action)
+ setattr(start,a_tagged,start_tagged("mrow",{ detail = action }))
+ process(list)
+ stop_tagged()
+ remove(actionstack)
+ end
+ elseif tag == "mstacker" then -- or tag == "mstackertop" or tag == "mstackermid" or tag == "mstackerbot" then
+ -- looks like it gets processed twice
+-- do we still end up here ?
+ setattr(start,a_tagged,restart_tagged(attr)) -- so we just reuse the attribute
+ process(list)
+ stop_tagged()
+ else
+ setattr(start,a_tagged,start_tagged("mrow"))
+ process(list)
+ stop_tagged()
+ end
+ else -- never happens, we're always document
+ setattr(start,a_tagged,start_tagged("mrow"))
+ process(list)
+ stop_tagged()
+ end
+ end
+ elseif id == math_fraction_code then
+ local num = getfield(start,"num")
+ local denom = getfield(start,"denom")
+ local left = getfield(start,"left")
+ local right = getfield(start,"right")
+ if left then
+ setattr(left,a_tagged,start_tagged("mo"))
+ process(left)
+ stop_tagged()
+ end
+ setattr(start,a_tagged,start_tagged("mfrac"))
+ process(num)
+ process(denom)
+ stop_tagged()
+ if right then
+ setattr(right,a_tagged,start_tagged("mo"))
+ process(right)
+ stop_tagged()
+ end
+ elseif id == math_choice_code then
+ local display = getfield(start,"display")
+ local text = getfield(start,"text")
+ local script = getfield(start,"script")
+ local scriptscript = getfield(start,"scriptscript")
+ if display then
+ process(display)
+ end
+ if text then
+ process(text)
+ end
+ if script then
+ process(script)
+ end
+ if scriptscript then
+ process(scriptscript)
+ end
+ elseif id == math_fence_code then
+ local delim = getfield(start,"delim")
+ local subtype = getfield(start,"subtype")
+ if subtype == 1 then
+ -- left
+ local properties = { }
+ insert(fencesstack,properties)
+ setattr(start,a_tagged,start_tagged("mfenced",{ properties = properties })) -- needs checking
+ if delim then
+ start_tagged("ignore")
+ local chr = getfield(delim,"small_char")
+ if chr ~= 0 then
+ properties.left = chr
+ end
+ process(delim)
+ stop_tagged()
+ end
+ start_tagged("mrow") -- begin of subsequence
+ elseif subtype == 2 then
+ -- middle
+ if delim then
+ start_tagged("ignore")
+ local top = fencesstack[#fencesstack]
+ local chr = getfield(delim,"small_char")
+ if chr ~= 0 then
+ local mid = top.middle
+ if mid then
+ mid[#mid+1] = chr
+ else
+ top.middle = { chr }
+ end
+ end
+ process(delim)
+ stop_tagged()
+ end
+ stop_tagged() -- end of subsequence
+ start_tagged("mrow") -- begin of subsequence
+ elseif subtype == 3 then
+ local properties = remove(fencesstack)
+ if not properties then
+ report_tags("missing right fence")
+ properties = { }
+ end
+ if delim then
+ start_tagged("ignore")
+ local chr = getfield(delim,"small_char")
+ if chr ~= 0 then
+ properties.right = chr
+ end
+ process(delim)
+ stop_tagged()
+ end
+ stop_tagged() -- end of subsequence
+ stop_tagged()
+ else
+ -- can't happen
+ end
+ elseif id == math_radical_code then
+ local left = getfield(start,"left")
+ local degree = getfield(start,"degree")
+ if left then
+ start_tagged("ignore")
+ process(left) -- root symbol, ignored
+ stop_tagged()
+ end
+ if degree and hascontent(degree) then
+ setattr(start,a_tagged,start_tagged("mroot"))
+ processsubsup(start)
+ process(degree)
+ stop_tagged()
+ else
+ setattr(start,a_tagged,start_tagged("msqrt"))
+ processsubsup(start)
+ stop_tagged()
+ end
+ elseif id == math_accent_code then
+ local accent = getfield(start,"accent")
+ local bot_accent = getfield(start,"bot_accent")
+ local subtype = getsubtype(start)
+ if bot_accent then
+ if accent then
+ setattr(start,a_tagged,start_tagged("munderover", {
+ accent = true,
+ top = getunicode(accent),
+ bottom = getunicode(bot_accent),
+ topfixed = subtype == math_fixed_top or subtype == math_fixed_both,
+ bottomfixed = subtype == math_fixed_bottom or subtype == math_fixed_both,
+ }))
+ processsubsup(start)
+ process(bot_accent)
+ process(accent)
+ stop_tagged()
+ else
+ setattr(start,a_tagged,start_tagged("munder", {
+ accent = true,
+ bottom = getunicode(bot_accent),
+ bottomfixed = subtype == math_fixed_bottom or subtype == math_fixed_both,
+ }))
+ processsubsup(start)
+ process(bot_accent)
+ stop_tagged()
+ end
+ elseif accent then
+ setattr(start,a_tagged,start_tagged("mover", {
+ accent = true,
+ top = getunicode(accent),
+ topfixed = subtype == math_fixed_top or subtype == math_fixed_both,
+ }))
+ processsubsup(start)
+ process(accent)
+ stop_tagged()
+ else
+ processsubsup(start)
+ end
+ elseif id == glue_code then
+ -- setattr(start,a_tagged,start_tagged("mspace",{ width = getfield(start,"width") }))
+ setattr(start,a_tagged,start_tagged("mspace"))
+ stop_tagged()
+ else
+ setattr(start,a_tagged,start_tagged("merror", { detail = nodecodes[i] }))
+ stop_tagged()
+ end
+ end
+ start = getnext(start)
+ end
+ if mtexttag then
+ stop_tagged()
+ end
+end
+
+function noads.handlers.tags(head,style,penalties)
+ head = tonut(head)
+ local v_mode = getattr(head,a_mathmode)
+ local v_math = start_tagged("math", { mode = v_mode == 1 and "display" or "inline" })
+ setattr(head,a_tagged,start_tagged("mrow"))
+ process(head)
+ stop_tagged()
+ stop_tagged()
+ return true
+end