diff options
Diffstat (limited to 'tex/context/base/typo-dir.lua')
-rw-r--r-- | tex/context/base/typo-dir.lua | 536 |
1 files changed, 381 insertions, 155 deletions
diff --git a/tex/context/base/typo-dir.lua b/tex/context/base/typo-dir.lua index a04028452..7e5f8c2d3 100644 --- a/tex/context/base/typo-dir.lua +++ b/tex/context/base/typo-dir.lua @@ -6,72 +6,37 @@ if not modules then modules = { } end modules ['typo-dir'] = { license = "see context related readme files" } --- When we started with this, there were some issues in luatex so we needed to take care of --- intereferences. Some has been improved but we stil might end up with each node having a --- dir property. Now, the biggest problem is that there is an official bidi algorithm but --- some searching on the web shows that there are many confusing aspects and therefore --- proposals circulate about (sometimes imcompatible ?) improvements. In the end it all boils --- down to the lack of willingness to tag an input source. Of course tagging of each number --- and fenced strip is somewhat over the top, but now it has to be captured in logic. Texies --- normally have no problem with tagging but we need to handle any input. So, what we have --- done here (over the years) is starting from what we expect to see happen, especially with --- respect to punctation, numbers and fences. Eventually alternative algorithms will be provides --- so that users can choose (the reason why suggestion sfor improvements circulate on the web --- is that it is non trivial to predict the expected behaviour so one hopes that the ditor --- and the rest of the machinery match somehow. Anyway, the fun of tex is that it has no hard --- coded behavior. And ... we also want to have more debugging and extras and ... so we want --- a flexible approach. In the end we will have: --- --- = full tagging (mechanism turned off) --- = half tagging (the current implementation) --- = unicode version x interpretation (several depending on the evolution) +-- todo: also use end_of_math here? local next, type = next, type local format, insert, sub, find, match = string.format, table.insert, string.sub, string.find, string.match local utfchar = utf.char -local formatters = string.formatters -local nodes, node = nodes, node - -local trace_textdirections = false trackers.register("typesetters.directions.text", function(v) trace_textdirections = v end) -local trace_mathdirections = false trackers.register("typesetters.directions.math", function(v) trace_mathdirections = v end) -local trace_directions = false trackers.register("typesetters.directions", function(v) trace_textdirections = v trace_mathdirections = v end) - -local report_textdirections = logs.reporter("typesetting","text directions") -local report_mathdirections = logs.reporter("typesetting","math directions") +-- vertical space handler +local nodes, node = nodes, node +local trace_directions = false trackers.register("typesetters.directions", function(v) trace_directions = v end) +local report_directions = logs.reporter("typesetting","directions") local traverse_id = node.traverse_id local insert_node_before = node.insert_before local insert_node_after = node.insert_after local remove_node = nodes.remove -local end_of_math = nodes.end_of_math -local texsetattribute = tex.setattribute -local texsetcount = tex.setcount +local texattribute = tex.attribute local unsetvalue = attributes.unsetvalue -local hasbit = number.hasbit - local nodecodes = nodes.nodecodes local whatcodes = nodes.whatcodes local mathcodes = nodes.mathcodes local tasks = nodes.tasks -local tracers = nodes.tracers -local setcolor = tracers.colors.set -local resetcolor = tracers.colors.reset local glyph_code = nodecodes.glyph local whatsit_code = nodecodes.whatsit local math_code = nodecodes.math -local penalty_code = nodecodes.penalty -local kern_code = nodecodes.kern -local glue_code = nodecodes.glue -local hlist_code = nodecodes.hlist -local vlist_code = nodecodes.vlist local localpar_code = whatcodes.localpar local dir_code = whatcodes.dir @@ -80,149 +45,404 @@ local nodepool = nodes.pool local new_textdir = nodepool.textdir +local beginmath_code = mathcodes.beginmath +local endmath_code = mathcodes.endmath + local fonthashes = fonts.hashes local fontdata = fonthashes.identifiers local fontchar = fonthashes.characters -local chardirections = characters.directions -local charmirrors = characters.mirrors -local charclasses = characters.textclasses - -local directions = typesetters.directions or { } -typesetters.directions = directions - -local a_state = attributes.private('state') -local a_directions = attributes.private('directions') -local a_mathbidi = attributes.private('mathbidi') - -local strip = false - -local s_isol = fonts.analyzers.states.isol - -local variables = interfaces.variables -local v_global = variables["global"] -local v_local = variables["local"] -local v_on = variables.on -local v_yes = variables.yes - -local m_enabled = 2^6 -- 64 -local m_global = 2^7 -local m_fences = 2^8 - -local handlers = { } -local methods = { } -local lastmethod = 0 - -local function installhandler(name,handler) - local method = methods[name] - if not method then - lastmethod = lastmethod + 1 - method = lastmethod - methods[name] = method +local chardata = characters.data +local chardirs = characters.directions -- maybe make a special mirror table + +--~ Analysis by Idris: +--~ +--~ 1. Assuming the reading- vs word-order distinction (bidi-char types) is governing; +--~ 2. Assuming that 'ARAB' represents an actual arabic string in raw input order, not word-order; +--~ 3. Assuming that 'BARA' represent the correct RL word order; +--~ +--~ Then we have, with input: LATIN ARAB +--~ +--~ \textdir TLT LATIN ARAB => LATIN BARA +--~ \textdir TRT LATIN ARAB => LATIN BARA +--~ \textdir TRT LRO LATIN ARAB => LATIN ARAB +--~ \textdir TLT LRO LATIN ARAB => LATIN ARAB +--~ \textdir TLT RLO LATIN ARAB => NITAL ARAB +--~ \textdir TRT RLO LATIN ARAB => NITAL ARAB + +-- elseif d == "es" then -- European Number Separator +-- elseif d == "et" then -- European Number Terminator +-- elseif d == "cs" then -- Common Number Separator +-- elseif d == "nsm" then -- Non-Spacing Mark +-- elseif d == "bn" then -- Boundary Neutral +-- elseif d == "b" then -- Paragraph Separator +-- elseif d == "s" then -- Segment Separator +-- elseif d == "ws" then -- Whitespace +-- elseif d == "on" then -- Other Neutrals + +typesetters.directions = typesetters.directions or { } +local directions = typesetters.directions + +local a_state = attributes.private('state') +local a_directions = attributes.private('directions') + +local skipmath = true +local strip = false + +-- todo: delayed inserts here +-- todo: get rid of local functions here + +-- beware, math adds whatsits afterwards so that will mess things up + +local finish, autodir, embedded, override, done = nil, 0, 0, 0, false +local list, glyphs = nil, false +local finished, finidir, finipos = nil, nil, 1 +local head, current, inserted = nil, nil, nil + +local function finish_auto_before() + head, inserted = insert_node_before(head,current,new_textdir("-"..finish)) + finished, finidir = inserted, finish + if trace_directions then + insert(list,#list,format("auto finish inserted before: %s",finish)) + finipos = #list-1 end - handlers[method] = handler - return method + finish, autodir, done = nil, 0, true end -directions.handlers = handlers -directions.installhandler = installhandler +local function finish_auto_after() + head, current = insert_node_after(head,current,new_textdir("-"..finish)) + finished, finidir = current, finish + if trace_directions then + list[#list+1] = format("auto finish inserted after: %s",finish) + finipos = #list + end + finish, autodir, done = nil, 0, true +end -local function tomode(specification) - local scope = specification.scope - local mode - if scope == v_global or scope == v_on then - mode = m_enabled + m_global - elseif scope == v_local then - mode = m_enabled - else - return 0 +local function force_auto_left_before() + if finish then + finish_auto_before() end - local method = methods[specification.method] - if method then - mode = mode + method + if embedded >= 0 then + finish, autodir, done = "TLT", 1, true else - return 0 + finish, autodir, done = "TRT", -1, true end - if specification.fences == v_yes then - mode = mode + m_fences + if finidir == finish then + head = remove_node(head,finished,true) + if trace_directions then + list[finipos] = list[finipos] .. " (deleted afterwards)" + insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded)) + end + else + head, inserted = insert_node_before(head,current,new_textdir("+"..finish)) + if trace_directions then + insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded)) + end end - return mode -end - -local function getglobal(a) - return a and a > 0 and hasbit(a,m_global) -end - -local function getfences(a) - return a and a > 0 and hasbit(a,m_fences) end -local function getmethod(a) - return a and a > 0 and a % m_enabled or 0 -end - -directions.tomode = tomode -directions.getglobal = getglobal -directions.getfences = getfences -directions.getmethod = getmethod -directions.installhandler = installhandler - --- beware: in dha we have character properties and in dua|b we have direction properties - -function directions.setcolor(current,direction,reversed,mirror) - if mirror then - setcolor(current,"bidi:mirrored") - elseif direction == "l" then - setcolor(current,reversed and "bidi:left:reversed" or "bidi:left:original") - elseif direction == "r" then - setcolor(current,reversed and "bidi:right:reversed" or "bidi:right:original") +local function force_auto_right_before() + if finish then + finish_auto_before() + end + if embedded <= 0 then + finish, autodir, done = "TRT", -1, true else - resetcolor(current) + finish, autodir, done = "TLT", 1, true + end + if finidir == finish then + head = remove_node(head,finished,true) + if trace_directions then + list[finipos] = list[finipos] .. " (deleted afterwards)" + insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded)) + end + else + head, inserted = insert_node_before(head,current,new_textdir("+"..finish)) + if trace_directions then + insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded)) + end end end -function commands.getbidimode(specification) - context(tomode(specification)) -- hash at tex end -end - -local enabled = false +-- todo: use new dir functions -local starttiming = statistics.starttiming -local stoptiming = statistics.stoptiming +local s_isol = fonts.analyzers.states.isol -function directions.handler(head) -- ,_,_,_,direction) -- nodes not nuts | 5th arg is direction - if not head.next then - return head, false +function directions.process(namespace,attribute,start) -- todo: make faster + if not start.next then + return start, false end - local attr = head[a_directions] - if not attr or attr == 0 then - return head, false + head, current, inserted = start, start, nil + finish, autodir, embedded, override, done = nil, 0, 0, 0, false + list, glyphs = trace_directions and { }, false + finished, finidir, finipos = nil, nil, 1 + local stack, top, obsolete = { }, 0, { } + local lro, rlo, prevattr, inmath = false, false, 0, false + while current do + local id = current.id + if skipmath and id == math_code then + local subtype = current.subtype + if subtype == beginmath_code then + inmath = true + elseif subtype == endmath_code then + inmath = false + else + -- todo + end + current = current.next + elseif inmath then + current = current.next + else + local attr = current[attribute] + if attr and attr > 0 then + -- current[attribute] = unsetvalue -- slow, needed? + if attr == 1 then + -- bidi parsing mode + elseif attr ~= prevattr then + -- no pop, grouped driven (2=normal,3=lro,4=rlo) + if attr == 3 then + if trace_directions then + list[#list+1] = format("override right -> left (lro) (bidi=%s)",attr) + end + lro, rlo = true, false + elseif attr == 4 then + if trace_directions then + list[#list+1] = format("override left -> right (rlo) (bidi=%s)",attr) + end + lro, rlo = false, true + else + if trace_directions and + current ~= head then list[#list+1] = format("override reset (bidi=%s)",attr) + end + lro, rlo = false, false + end + prevattr = attr + end + end + if id == glyph_code then + glyphs = true + if attr and attr > 0 then + local char = current.char + local d = chardirs[char] + if rlo or override > 0 then + if d == "l" then + if trace_directions then + list[#list+1] = format("char %s (%s / U+%04X) of class %s overidden to r (bidi=%s)",utfchar(char),char,char,d,attr) + end + d = "r" + elseif trace_directions then + if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal + list[#list+1] = format("override char of class %s (bidi=%s)",d,attr) + else -- todo: rle lre + list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr) + end + end + elseif lro or override < 0 then + if d == "r" or d == "al" then + current[a_state] = s_isol -- maybe better have a special bidi attr value -> override (9) -> todo + if trace_directions then + list[#list+1] = format("char %s (%s / U+%04X) of class %s overidden to l (bidi=%s) (state=isol)",utfchar(char),char,char,d,attr) + end + d = "l" + elseif trace_directions then + if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal + list[#list+1] = format("override char of class %s (bidi=%s)",d,attr) + else -- todo: rle lre + list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr) + end + end + elseif trace_directions then + if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal + list[#list+1] = format("override char of class %s (bidi=%s)",d,attr) + else -- todo: rle lre + list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr) + end + end + if d == "on" then + local mirror = chardata[char].mirror -- maybe make a special mirror table + if mirror and fontchar[current.font][mirror] then + -- todo: set attribute + if autodir < 0 then + current.char = mirror + done = true + --~ elseif left or autodir > 0 then + --~ if not is_right(current.prev) then + --~ current.char = mirror + --~ done = true + --~ end + end + end + elseif d == "l" or d == "en" then -- european number + if autodir <= 0 then -- could be option + force_auto_left_before() + end + elseif d == "r" or d == "al" then -- arabic number + if autodir >= 0 then + force_auto_right_before() + end + elseif d == "an" then -- arabic number + -- actually this is language dependent ... +-- if autodir <= 0 then +-- force_auto_left_before() +-- end + if autodir >= 0 then + force_auto_right_before() + end + elseif d == "lro" then -- Left-to-Right Override -> right becomes left + if trace_directions then + list[#list+1] = "override right -> left" + end + top = top + 1 + stack[top] = { override, embedded } + override = -1 + obsolete[#obsolete+1] = current + elseif d == "rlo" then -- Right-to-Left Override -> left becomes right + if trace_directions then + list[#list+1] = "override left -> right" + end + top = top + 1 + stack[top] = { override, embedded } + override = 1 + obsolete[#obsolete+1] = current + elseif d == "lre" then -- Left-to-Right Embedding -> TLT + if trace_directions then + list[#list+1] = "embedding left -> right" + end + top = top + 1 + stack[top] = { override, embedded } + embedded = 1 + obsolete[#obsolete+1] = current + elseif d == "rle" then -- Right-to-Left Embedding -> TRT + if trace_directions then + list[#list+1] = "embedding right -> left" + end + top = top + 1 + stack[top] = { override, embedded } + embedded = -1 -- was 1 + obsolete[#obsolete+1] = current + elseif d == "pdf" then -- Pop Directional Format + -- override = 0 + if top > 0 then + local s = stack[top] + override, embedded = s[1], s[2] + top = top - 1 + if trace_directions then + list[#list+1] = format("state: override: %s, embedded: %s, autodir: %s",override,embedded,autodir) + end + else + if trace_directions then + list[#list+1] = "pop (error, too many pops)" + end + end + obsolete[#obsolete+1] = current + end + elseif trace_directions then + local char = current.char + local d = chardirs[char] + list[#list+1] = format("char %s (%s / U+%04X) of class %s (no bidi)",utfchar(char),char,char,d or "?") + end + elseif id == whatsit_code then + if finish then + finish_auto_before() + end + local subtype = current.subtype + if subtype == localpar_code then + local dir = current.dir + local d = sub(dir,2,2) + if d == 'R' then -- find(dir,".R.") / dir == "TRT" + autodir = -1 + else + autodir = 1 + end + -- embedded = autodir + if trace_directions then + list[#list+1] = format("pardir %s",dir) + end + elseif subtype == dir_code then + local dir = current.dir + -- local sign = sub(dir,1,1) + -- local dire = sub(dir,3,3) + local sign, dire = match(dir,"^(.).(.)") + if dire == "R" then + if sign == "+" then + finish, autodir = "TRT", -1 + else + finish, autodir = nil, 0 + end + else + if sign == "+" then + finish, autodir = "TLT", 1 + else + finish, autodir = nil, 0 + end + end + if trace_directions then + list[#list+1] = format("textdir %s",dir) + end + end + else + if trace_directions then + list[#list+1] = format("node %s (subtype %s)",nodecodes[id],current.subtype) + end + if finish then + finish_auto_before() + end + end + local cn = current.next + if not cn then + if finish then + finish_auto_after() + end + end + current = cn + end + end + if trace_directions and glyphs then + report_directions("start log") + for i=1,#list do + report_directions("%02i: %s",i,list[i]) + end + report_directions("stop log") end - local method = getmethod(attr) - local handler = handlers[method] - if not handler then - return head, false + if done and strip then + local n = #obsolete + if n > 0 then + for i=1,n do + remove_node(head,obsolete[i],true) + end + report_directions("%s character nodes removed",n) + end end - starttiming(directions) - local head, done = handler(head) - stoptiming(directions) return head, done end -statistics.register("text directions", function() - if enabled then - return statistics.elapsedseconds(directions) - end -end) +--~ local function is_right(n) -- keep ! +--~ if n then +--~ local id = n.id +--~ if id == glyph_code then +--~ local attr = n[attribute] +--~ if attr and attr > 0 then +--~ local d = chardirs[n.char] +--~ if d == "r" or d == "al" then -- override +--~ return true +--~ end +--~ end +--~ end +--~ end +--~ return false +--~ end + +--~ function directions.enable() +--~ tasks.enableaction("processors","directions.handler") +--~ end --- function directions.enable() --- tasks.enableaction("processors","directions.handler") --- end +local enabled = false function directions.set(n) -- todo: names and numbers if not enabled then - if trace_textdirections then - report_textdirections("enabling directions handler") + if trace_directions then + report_breakpoints("enabling directions handler") end tasks.enableaction("processors","typesetters.directions.handler") enabled = true @@ -231,7 +451,13 @@ function directions.set(n) -- todo: names and numbers n = unsetvalue -- maybe tracing end - texsetattribute(a_directions,n) + texattribute[a_directions] = n end commands.setdirection = directions.set + +directions.handler = nodes.installattributehandler { + name = "directions", + namespace = directions, + processor = directions.process, +} |