summaryrefslogtreecommitdiff
path: root/tex/context/base/typo-dir.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/typo-dir.lua')
-rw-r--r--tex/context/base/typo-dir.lua536
1 files changed, 381 insertions, 155 deletions
diff --git a/tex/context/base/typo-dir.lua b/tex/context/base/typo-dir.lua
index a04028452..7e5f8c2d3 100644
--- a/tex/context/base/typo-dir.lua
+++ b/tex/context/base/typo-dir.lua
@@ -6,72 +6,37 @@ if not modules then modules = { } end modules ['typo-dir'] = {
license = "see context related readme files"
}
--- When we started with this, there were some issues in luatex so we needed to take care of
--- intereferences. Some has been improved but we stil might end up with each node having a
--- dir property. Now, the biggest problem is that there is an official bidi algorithm but
--- some searching on the web shows that there are many confusing aspects and therefore
--- proposals circulate about (sometimes imcompatible ?) improvements. In the end it all boils
--- down to the lack of willingness to tag an input source. Of course tagging of each number
--- and fenced strip is somewhat over the top, but now it has to be captured in logic. Texies
--- normally have no problem with tagging but we need to handle any input. So, what we have
--- done here (over the years) is starting from what we expect to see happen, especially with
--- respect to punctation, numbers and fences. Eventually alternative algorithms will be provides
--- so that users can choose (the reason why suggestion sfor improvements circulate on the web
--- is that it is non trivial to predict the expected behaviour so one hopes that the ditor
--- and the rest of the machinery match somehow. Anyway, the fun of tex is that it has no hard
--- coded behavior. And ... we also want to have more debugging and extras and ... so we want
--- a flexible approach. In the end we will have:
---
--- = full tagging (mechanism turned off)
--- = half tagging (the current implementation)
--- = unicode version x interpretation (several depending on the evolution)
+-- todo: also use end_of_math here?
local next, type = next, type
local format, insert, sub, find, match = string.format, table.insert, string.sub, string.find, string.match
local utfchar = utf.char
-local formatters = string.formatters
-local nodes, node = nodes, node
-
-local trace_textdirections = false trackers.register("typesetters.directions.text", function(v) trace_textdirections = v end)
-local trace_mathdirections = false trackers.register("typesetters.directions.math", function(v) trace_mathdirections = v end)
-local trace_directions = false trackers.register("typesetters.directions", function(v) trace_textdirections = v trace_mathdirections = v end)
-
-local report_textdirections = logs.reporter("typesetting","text directions")
-local report_mathdirections = logs.reporter("typesetting","math directions")
+-- vertical space handler
+local nodes, node = nodes, node
+local trace_directions = false trackers.register("typesetters.directions", function(v) trace_directions = v end)
+local report_directions = logs.reporter("typesetting","directions")
local traverse_id = node.traverse_id
local insert_node_before = node.insert_before
local insert_node_after = node.insert_after
local remove_node = nodes.remove
-local end_of_math = nodes.end_of_math
-local texsetattribute = tex.setattribute
-local texsetcount = tex.setcount
+local texattribute = tex.attribute
local unsetvalue = attributes.unsetvalue
-local hasbit = number.hasbit
-
local nodecodes = nodes.nodecodes
local whatcodes = nodes.whatcodes
local mathcodes = nodes.mathcodes
local tasks = nodes.tasks
-local tracers = nodes.tracers
-local setcolor = tracers.colors.set
-local resetcolor = tracers.colors.reset
local glyph_code = nodecodes.glyph
local whatsit_code = nodecodes.whatsit
local math_code = nodecodes.math
-local penalty_code = nodecodes.penalty
-local kern_code = nodecodes.kern
-local glue_code = nodecodes.glue
-local hlist_code = nodecodes.hlist
-local vlist_code = nodecodes.vlist
local localpar_code = whatcodes.localpar
local dir_code = whatcodes.dir
@@ -80,149 +45,404 @@ local nodepool = nodes.pool
local new_textdir = nodepool.textdir
+local beginmath_code = mathcodes.beginmath
+local endmath_code = mathcodes.endmath
+
local fonthashes = fonts.hashes
local fontdata = fonthashes.identifiers
local fontchar = fonthashes.characters
-local chardirections = characters.directions
-local charmirrors = characters.mirrors
-local charclasses = characters.textclasses
-
-local directions = typesetters.directions or { }
-typesetters.directions = directions
-
-local a_state = attributes.private('state')
-local a_directions = attributes.private('directions')
-local a_mathbidi = attributes.private('mathbidi')
-
-local strip = false
-
-local s_isol = fonts.analyzers.states.isol
-
-local variables = interfaces.variables
-local v_global = variables["global"]
-local v_local = variables["local"]
-local v_on = variables.on
-local v_yes = variables.yes
-
-local m_enabled = 2^6 -- 64
-local m_global = 2^7
-local m_fences = 2^8
-
-local handlers = { }
-local methods = { }
-local lastmethod = 0
-
-local function installhandler(name,handler)
- local method = methods[name]
- if not method then
- lastmethod = lastmethod + 1
- method = lastmethod
- methods[name] = method
+local chardata = characters.data
+local chardirs = characters.directions -- maybe make a special mirror table
+
+--~ Analysis by Idris:
+--~
+--~ 1. Assuming the reading- vs word-order distinction (bidi-char types) is governing;
+--~ 2. Assuming that 'ARAB' represents an actual arabic string in raw input order, not word-order;
+--~ 3. Assuming that 'BARA' represent the correct RL word order;
+--~
+--~ Then we have, with input: LATIN ARAB
+--~
+--~ \textdir TLT LATIN ARAB => LATIN BARA
+--~ \textdir TRT LATIN ARAB => LATIN BARA
+--~ \textdir TRT LRO LATIN ARAB => LATIN ARAB
+--~ \textdir TLT LRO LATIN ARAB => LATIN ARAB
+--~ \textdir TLT RLO LATIN ARAB => NITAL ARAB
+--~ \textdir TRT RLO LATIN ARAB => NITAL ARAB
+
+-- elseif d == "es" then -- European Number Separator
+-- elseif d == "et" then -- European Number Terminator
+-- elseif d == "cs" then -- Common Number Separator
+-- elseif d == "nsm" then -- Non-Spacing Mark
+-- elseif d == "bn" then -- Boundary Neutral
+-- elseif d == "b" then -- Paragraph Separator
+-- elseif d == "s" then -- Segment Separator
+-- elseif d == "ws" then -- Whitespace
+-- elseif d == "on" then -- Other Neutrals
+
+typesetters.directions = typesetters.directions or { }
+local directions = typesetters.directions
+
+local a_state = attributes.private('state')
+local a_directions = attributes.private('directions')
+
+local skipmath = true
+local strip = false
+
+-- todo: delayed inserts here
+-- todo: get rid of local functions here
+
+-- beware, math adds whatsits afterwards so that will mess things up
+
+local finish, autodir, embedded, override, done = nil, 0, 0, 0, false
+local list, glyphs = nil, false
+local finished, finidir, finipos = nil, nil, 1
+local head, current, inserted = nil, nil, nil
+
+local function finish_auto_before()
+ head, inserted = insert_node_before(head,current,new_textdir("-"..finish))
+ finished, finidir = inserted, finish
+ if trace_directions then
+ insert(list,#list,format("auto finish inserted before: %s",finish))
+ finipos = #list-1
end
- handlers[method] = handler
- return method
+ finish, autodir, done = nil, 0, true
end
-directions.handlers = handlers
-directions.installhandler = installhandler
+local function finish_auto_after()
+ head, current = insert_node_after(head,current,new_textdir("-"..finish))
+ finished, finidir = current, finish
+ if trace_directions then
+ list[#list+1] = format("auto finish inserted after: %s",finish)
+ finipos = #list
+ end
+ finish, autodir, done = nil, 0, true
+end
-local function tomode(specification)
- local scope = specification.scope
- local mode
- if scope == v_global or scope == v_on then
- mode = m_enabled + m_global
- elseif scope == v_local then
- mode = m_enabled
- else
- return 0
+local function force_auto_left_before()
+ if finish then
+ finish_auto_before()
end
- local method = methods[specification.method]
- if method then
- mode = mode + method
+ if embedded >= 0 then
+ finish, autodir, done = "TLT", 1, true
else
- return 0
+ finish, autodir, done = "TRT", -1, true
end
- if specification.fences == v_yes then
- mode = mode + m_fences
+ if finidir == finish then
+ head = remove_node(head,finished,true)
+ if trace_directions then
+ list[finipos] = list[finipos] .. " (deleted afterwards)"
+ insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
+ end
+ else
+ head, inserted = insert_node_before(head,current,new_textdir("+"..finish))
+ if trace_directions then
+ insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
+ end
end
- return mode
-end
-
-local function getglobal(a)
- return a and a > 0 and hasbit(a,m_global)
-end
-
-local function getfences(a)
- return a and a > 0 and hasbit(a,m_fences)
end
-local function getmethod(a)
- return a and a > 0 and a % m_enabled or 0
-end
-
-directions.tomode = tomode
-directions.getglobal = getglobal
-directions.getfences = getfences
-directions.getmethod = getmethod
-directions.installhandler = installhandler
-
--- beware: in dha we have character properties and in dua|b we have direction properties
-
-function directions.setcolor(current,direction,reversed,mirror)
- if mirror then
- setcolor(current,"bidi:mirrored")
- elseif direction == "l" then
- setcolor(current,reversed and "bidi:left:reversed" or "bidi:left:original")
- elseif direction == "r" then
- setcolor(current,reversed and "bidi:right:reversed" or "bidi:right:original")
+local function force_auto_right_before()
+ if finish then
+ finish_auto_before()
+ end
+ if embedded <= 0 then
+ finish, autodir, done = "TRT", -1, true
else
- resetcolor(current)
+ finish, autodir, done = "TLT", 1, true
+ end
+ if finidir == finish then
+ head = remove_node(head,finished,true)
+ if trace_directions then
+ list[finipos] = list[finipos] .. " (deleted afterwards)"
+ insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
+ end
+ else
+ head, inserted = insert_node_before(head,current,new_textdir("+"..finish))
+ if trace_directions then
+ insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
+ end
end
end
-function commands.getbidimode(specification)
- context(tomode(specification)) -- hash at tex end
-end
-
-local enabled = false
+-- todo: use new dir functions
-local starttiming = statistics.starttiming
-local stoptiming = statistics.stoptiming
+local s_isol = fonts.analyzers.states.isol
-function directions.handler(head) -- ,_,_,_,direction) -- nodes not nuts | 5th arg is direction
- if not head.next then
- return head, false
+function directions.process(namespace,attribute,start) -- todo: make faster
+ if not start.next then
+ return start, false
end
- local attr = head[a_directions]
- if not attr or attr == 0 then
- return head, false
+ head, current, inserted = start, start, nil
+ finish, autodir, embedded, override, done = nil, 0, 0, 0, false
+ list, glyphs = trace_directions and { }, false
+ finished, finidir, finipos = nil, nil, 1
+ local stack, top, obsolete = { }, 0, { }
+ local lro, rlo, prevattr, inmath = false, false, 0, false
+ while current do
+ local id = current.id
+ if skipmath and id == math_code then
+ local subtype = current.subtype
+ if subtype == beginmath_code then
+ inmath = true
+ elseif subtype == endmath_code then
+ inmath = false
+ else
+ -- todo
+ end
+ current = current.next
+ elseif inmath then
+ current = current.next
+ else
+ local attr = current[attribute]
+ if attr and attr > 0 then
+ -- current[attribute] = unsetvalue -- slow, needed?
+ if attr == 1 then
+ -- bidi parsing mode
+ elseif attr ~= prevattr then
+ -- no pop, grouped driven (2=normal,3=lro,4=rlo)
+ if attr == 3 then
+ if trace_directions then
+ list[#list+1] = format("override right -> left (lro) (bidi=%s)",attr)
+ end
+ lro, rlo = true, false
+ elseif attr == 4 then
+ if trace_directions then
+ list[#list+1] = format("override left -> right (rlo) (bidi=%s)",attr)
+ end
+ lro, rlo = false, true
+ else
+ if trace_directions and
+ current ~= head then list[#list+1] = format("override reset (bidi=%s)",attr)
+ end
+ lro, rlo = false, false
+ end
+ prevattr = attr
+ end
+ end
+ if id == glyph_code then
+ glyphs = true
+ if attr and attr > 0 then
+ local char = current.char
+ local d = chardirs[char]
+ if rlo or override > 0 then
+ if d == "l" then
+ if trace_directions then
+ list[#list+1] = format("char %s (%s / U+%04X) of class %s overidden to r (bidi=%s)",utfchar(char),char,char,d,attr)
+ end
+ d = "r"
+ elseif trace_directions then
+ if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal
+ list[#list+1] = format("override char of class %s (bidi=%s)",d,attr)
+ else -- todo: rle lre
+ list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr)
+ end
+ end
+ elseif lro or override < 0 then
+ if d == "r" or d == "al" then
+ current[a_state] = s_isol -- maybe better have a special bidi attr value -> override (9) -> todo
+ if trace_directions then
+ list[#list+1] = format("char %s (%s / U+%04X) of class %s overidden to l (bidi=%s) (state=isol)",utfchar(char),char,char,d,attr)
+ end
+ d = "l"
+ elseif trace_directions then
+ if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal
+ list[#list+1] = format("override char of class %s (bidi=%s)",d,attr)
+ else -- todo: rle lre
+ list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr)
+ end
+ end
+ elseif trace_directions then
+ if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal
+ list[#list+1] = format("override char of class %s (bidi=%s)",d,attr)
+ else -- todo: rle lre
+ list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr)
+ end
+ end
+ if d == "on" then
+ local mirror = chardata[char].mirror -- maybe make a special mirror table
+ if mirror and fontchar[current.font][mirror] then
+ -- todo: set attribute
+ if autodir < 0 then
+ current.char = mirror
+ done = true
+ --~ elseif left or autodir > 0 then
+ --~ if not is_right(current.prev) then
+ --~ current.char = mirror
+ --~ done = true
+ --~ end
+ end
+ end
+ elseif d == "l" or d == "en" then -- european number
+ if autodir <= 0 then -- could be option
+ force_auto_left_before()
+ end
+ elseif d == "r" or d == "al" then -- arabic number
+ if autodir >= 0 then
+ force_auto_right_before()
+ end
+ elseif d == "an" then -- arabic number
+ -- actually this is language dependent ...
+-- if autodir <= 0 then
+-- force_auto_left_before()
+-- end
+ if autodir >= 0 then
+ force_auto_right_before()
+ end
+ elseif d == "lro" then -- Left-to-Right Override -> right becomes left
+ if trace_directions then
+ list[#list+1] = "override right -> left"
+ end
+ top = top + 1
+ stack[top] = { override, embedded }
+ override = -1
+ obsolete[#obsolete+1] = current
+ elseif d == "rlo" then -- Right-to-Left Override -> left becomes right
+ if trace_directions then
+ list[#list+1] = "override left -> right"
+ end
+ top = top + 1
+ stack[top] = { override, embedded }
+ override = 1
+ obsolete[#obsolete+1] = current
+ elseif d == "lre" then -- Left-to-Right Embedding -> TLT
+ if trace_directions then
+ list[#list+1] = "embedding left -> right"
+ end
+ top = top + 1
+ stack[top] = { override, embedded }
+ embedded = 1
+ obsolete[#obsolete+1] = current
+ elseif d == "rle" then -- Right-to-Left Embedding -> TRT
+ if trace_directions then
+ list[#list+1] = "embedding right -> left"
+ end
+ top = top + 1
+ stack[top] = { override, embedded }
+ embedded = -1 -- was 1
+ obsolete[#obsolete+1] = current
+ elseif d == "pdf" then -- Pop Directional Format
+ -- override = 0
+ if top > 0 then
+ local s = stack[top]
+ override, embedded = s[1], s[2]
+ top = top - 1
+ if trace_directions then
+ list[#list+1] = format("state: override: %s, embedded: %s, autodir: %s",override,embedded,autodir)
+ end
+ else
+ if trace_directions then
+ list[#list+1] = "pop (error, too many pops)"
+ end
+ end
+ obsolete[#obsolete+1] = current
+ end
+ elseif trace_directions then
+ local char = current.char
+ local d = chardirs[char]
+ list[#list+1] = format("char %s (%s / U+%04X) of class %s (no bidi)",utfchar(char),char,char,d or "?")
+ end
+ elseif id == whatsit_code then
+ if finish then
+ finish_auto_before()
+ end
+ local subtype = current.subtype
+ if subtype == localpar_code then
+ local dir = current.dir
+ local d = sub(dir,2,2)
+ if d == 'R' then -- find(dir,".R.") / dir == "TRT"
+ autodir = -1
+ else
+ autodir = 1
+ end
+ -- embedded = autodir
+ if trace_directions then
+ list[#list+1] = format("pardir %s",dir)
+ end
+ elseif subtype == dir_code then
+ local dir = current.dir
+ -- local sign = sub(dir,1,1)
+ -- local dire = sub(dir,3,3)
+ local sign, dire = match(dir,"^(.).(.)")
+ if dire == "R" then
+ if sign == "+" then
+ finish, autodir = "TRT", -1
+ else
+ finish, autodir = nil, 0
+ end
+ else
+ if sign == "+" then
+ finish, autodir = "TLT", 1
+ else
+ finish, autodir = nil, 0
+ end
+ end
+ if trace_directions then
+ list[#list+1] = format("textdir %s",dir)
+ end
+ end
+ else
+ if trace_directions then
+ list[#list+1] = format("node %s (subtype %s)",nodecodes[id],current.subtype)
+ end
+ if finish then
+ finish_auto_before()
+ end
+ end
+ local cn = current.next
+ if not cn then
+ if finish then
+ finish_auto_after()
+ end
+ end
+ current = cn
+ end
+ end
+ if trace_directions and glyphs then
+ report_directions("start log")
+ for i=1,#list do
+ report_directions("%02i: %s",i,list[i])
+ end
+ report_directions("stop log")
end
- local method = getmethod(attr)
- local handler = handlers[method]
- if not handler then
- return head, false
+ if done and strip then
+ local n = #obsolete
+ if n > 0 then
+ for i=1,n do
+ remove_node(head,obsolete[i],true)
+ end
+ report_directions("%s character nodes removed",n)
+ end
end
- starttiming(directions)
- local head, done = handler(head)
- stoptiming(directions)
return head, done
end
-statistics.register("text directions", function()
- if enabled then
- return statistics.elapsedseconds(directions)
- end
-end)
+--~ local function is_right(n) -- keep !
+--~ if n then
+--~ local id = n.id
+--~ if id == glyph_code then
+--~ local attr = n[attribute]
+--~ if attr and attr > 0 then
+--~ local d = chardirs[n.char]
+--~ if d == "r" or d == "al" then -- override
+--~ return true
+--~ end
+--~ end
+--~ end
+--~ end
+--~ return false
+--~ end
+
+--~ function directions.enable()
+--~ tasks.enableaction("processors","directions.handler")
+--~ end
--- function directions.enable()
--- tasks.enableaction("processors","directions.handler")
--- end
+local enabled = false
function directions.set(n) -- todo: names and numbers
if not enabled then
- if trace_textdirections then
- report_textdirections("enabling directions handler")
+ if trace_directions then
+ report_breakpoints("enabling directions handler")
end
tasks.enableaction("processors","typesetters.directions.handler")
enabled = true
@@ -231,7 +451,13 @@ function directions.set(n) -- todo: names and numbers
n = unsetvalue
-- maybe tracing
end
- texsetattribute(a_directions,n)
+ texattribute[a_directions] = n
end
commands.setdirection = directions.set
+
+directions.handler = nodes.installattributehandler {
+ name = "directions",
+ namespace = directions,
+ processor = directions.process,
+}