summaryrefslogtreecommitdiff
path: root/tex/context/base/typo-dir.lua
diff options
context:
space:
mode:
authorMarius <mariausol@gmail.com>2013-10-20 01:21:09 +0300
committerMarius <mariausol@gmail.com>2013-10-20 01:21:09 +0300
commitb8ac6d7b7fdb16293c28034c349efd5b0b7b20b3 (patch)
tree0e9051dbe21b4e9cfc72fe594df5b0fe7bc511f3 /tex/context/base/typo-dir.lua
parent965214d981e6129b782c67adcaf3a81aedcb0bac (diff)
downloadcontext-b8ac6d7b7fdb16293c28034c349efd5b0b7b20b3.tar.gz
beta 2013.10.20 07:09
Diffstat (limited to 'tex/context/base/typo-dir.lua')
-rw-r--r--tex/context/base/typo-dir.lua536
1 files changed, 155 insertions, 381 deletions
diff --git a/tex/context/base/typo-dir.lua b/tex/context/base/typo-dir.lua
index 7e5f8c2d3..a04028452 100644
--- a/tex/context/base/typo-dir.lua
+++ b/tex/context/base/typo-dir.lua
@@ -6,37 +6,72 @@ if not modules then modules = { } end modules ['typo-dir'] = {
license = "see context related readme files"
}
--- todo: also use end_of_math here?
+-- When we started with this, there were some issues in luatex so we needed to take care of
+-- intereferences. Some has been improved but we stil might end up with each node having a
+-- dir property. Now, the biggest problem is that there is an official bidi algorithm but
+-- some searching on the web shows that there are many confusing aspects and therefore
+-- proposals circulate about (sometimes imcompatible ?) improvements. In the end it all boils
+-- down to the lack of willingness to tag an input source. Of course tagging of each number
+-- and fenced strip is somewhat over the top, but now it has to be captured in logic. Texies
+-- normally have no problem with tagging but we need to handle any input. So, what we have
+-- done here (over the years) is starting from what we expect to see happen, especially with
+-- respect to punctation, numbers and fences. Eventually alternative algorithms will be provides
+-- so that users can choose (the reason why suggestion sfor improvements circulate on the web
+-- is that it is non trivial to predict the expected behaviour so one hopes that the ditor
+-- and the rest of the machinery match somehow. Anyway, the fun of tex is that it has no hard
+-- coded behavior. And ... we also want to have more debugging and extras and ... so we want
+-- a flexible approach. In the end we will have:
+--
+-- = full tagging (mechanism turned off)
+-- = half tagging (the current implementation)
+-- = unicode version x interpretation (several depending on the evolution)
local next, type = next, type
local format, insert, sub, find, match = string.format, table.insert, string.sub, string.find, string.match
local utfchar = utf.char
-
--- vertical space handler
+local formatters = string.formatters
local nodes, node = nodes, node
-local trace_directions = false trackers.register("typesetters.directions", function(v) trace_directions = v end)
+local trace_textdirections = false trackers.register("typesetters.directions.text", function(v) trace_textdirections = v end)
+local trace_mathdirections = false trackers.register("typesetters.directions.math", function(v) trace_mathdirections = v end)
+local trace_directions = false trackers.register("typesetters.directions", function(v) trace_textdirections = v trace_mathdirections = v end)
+
+local report_textdirections = logs.reporter("typesetting","text directions")
+local report_mathdirections = logs.reporter("typesetting","math directions")
+
+
-local report_directions = logs.reporter("typesetting","directions")
local traverse_id = node.traverse_id
local insert_node_before = node.insert_before
local insert_node_after = node.insert_after
local remove_node = nodes.remove
+local end_of_math = nodes.end_of_math
-local texattribute = tex.attribute
+local texsetattribute = tex.setattribute
+local texsetcount = tex.setcount
local unsetvalue = attributes.unsetvalue
+local hasbit = number.hasbit
+
local nodecodes = nodes.nodecodes
local whatcodes = nodes.whatcodes
local mathcodes = nodes.mathcodes
local tasks = nodes.tasks
+local tracers = nodes.tracers
+local setcolor = tracers.colors.set
+local resetcolor = tracers.colors.reset
local glyph_code = nodecodes.glyph
local whatsit_code = nodecodes.whatsit
local math_code = nodecodes.math
+local penalty_code = nodecodes.penalty
+local kern_code = nodecodes.kern
+local glue_code = nodecodes.glue
+local hlist_code = nodecodes.hlist
+local vlist_code = nodecodes.vlist
local localpar_code = whatcodes.localpar
local dir_code = whatcodes.dir
@@ -45,404 +80,149 @@ local nodepool = nodes.pool
local new_textdir = nodepool.textdir
-local beginmath_code = mathcodes.beginmath
-local endmath_code = mathcodes.endmath
-
local fonthashes = fonts.hashes
local fontdata = fonthashes.identifiers
local fontchar = fonthashes.characters
-local chardata = characters.data
-local chardirs = characters.directions -- maybe make a special mirror table
-
---~ Analysis by Idris:
---~
---~ 1. Assuming the reading- vs word-order distinction (bidi-char types) is governing;
---~ 2. Assuming that 'ARAB' represents an actual arabic string in raw input order, not word-order;
---~ 3. Assuming that 'BARA' represent the correct RL word order;
---~
---~ Then we have, with input: LATIN ARAB
---~
---~ \textdir TLT LATIN ARAB => LATIN BARA
---~ \textdir TRT LATIN ARAB => LATIN BARA
---~ \textdir TRT LRO LATIN ARAB => LATIN ARAB
---~ \textdir TLT LRO LATIN ARAB => LATIN ARAB
---~ \textdir TLT RLO LATIN ARAB => NITAL ARAB
---~ \textdir TRT RLO LATIN ARAB => NITAL ARAB
-
--- elseif d == "es" then -- European Number Separator
--- elseif d == "et" then -- European Number Terminator
--- elseif d == "cs" then -- Common Number Separator
--- elseif d == "nsm" then -- Non-Spacing Mark
--- elseif d == "bn" then -- Boundary Neutral
--- elseif d == "b" then -- Paragraph Separator
--- elseif d == "s" then -- Segment Separator
--- elseif d == "ws" then -- Whitespace
--- elseif d == "on" then -- Other Neutrals
-
-typesetters.directions = typesetters.directions or { }
-local directions = typesetters.directions
-
-local a_state = attributes.private('state')
-local a_directions = attributes.private('directions')
-
-local skipmath = true
-local strip = false
-
--- todo: delayed inserts here
--- todo: get rid of local functions here
-
--- beware, math adds whatsits afterwards so that will mess things up
-
-local finish, autodir, embedded, override, done = nil, 0, 0, 0, false
-local list, glyphs = nil, false
-local finished, finidir, finipos = nil, nil, 1
-local head, current, inserted = nil, nil, nil
-
-local function finish_auto_before()
- head, inserted = insert_node_before(head,current,new_textdir("-"..finish))
- finished, finidir = inserted, finish
- if trace_directions then
- insert(list,#list,format("auto finish inserted before: %s",finish))
- finipos = #list-1
- end
- finish, autodir, done = nil, 0, true
-end
+local chardirections = characters.directions
+local charmirrors = characters.mirrors
+local charclasses = characters.textclasses
+
+local directions = typesetters.directions or { }
+typesetters.directions = directions
+
+local a_state = attributes.private('state')
+local a_directions = attributes.private('directions')
+local a_mathbidi = attributes.private('mathbidi')
+
+local strip = false
-local function finish_auto_after()
- head, current = insert_node_after(head,current,new_textdir("-"..finish))
- finished, finidir = current, finish
- if trace_directions then
- list[#list+1] = format("auto finish inserted after: %s",finish)
- finipos = #list
+local s_isol = fonts.analyzers.states.isol
+
+local variables = interfaces.variables
+local v_global = variables["global"]
+local v_local = variables["local"]
+local v_on = variables.on
+local v_yes = variables.yes
+
+local m_enabled = 2^6 -- 64
+local m_global = 2^7
+local m_fences = 2^8
+
+local handlers = { }
+local methods = { }
+local lastmethod = 0
+
+local function installhandler(name,handler)
+ local method = methods[name]
+ if not method then
+ lastmethod = lastmethod + 1
+ method = lastmethod
+ methods[name] = method
end
- finish, autodir, done = nil, 0, true
+ handlers[method] = handler
+ return method
end
-local function force_auto_left_before()
- if finish then
- finish_auto_before()
- end
- if embedded >= 0 then
- finish, autodir, done = "TLT", 1, true
+directions.handlers = handlers
+directions.installhandler = installhandler
+
+local function tomode(specification)
+ local scope = specification.scope
+ local mode
+ if scope == v_global or scope == v_on then
+ mode = m_enabled + m_global
+ elseif scope == v_local then
+ mode = m_enabled
else
- finish, autodir, done = "TRT", -1, true
+ return 0
end
- if finidir == finish then
- head = remove_node(head,finished,true)
- if trace_directions then
- list[finipos] = list[finipos] .. " (deleted afterwards)"
- insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
- end
+ local method = methods[specification.method]
+ if method then
+ mode = mode + method
else
- head, inserted = insert_node_before(head,current,new_textdir("+"..finish))
- if trace_directions then
- insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
- end
+ return 0
end
+ if specification.fences == v_yes then
+ mode = mode + m_fences
+ end
+ return mode
end
-local function force_auto_right_before()
- if finish then
- finish_auto_before()
- end
- if embedded <= 0 then
- finish, autodir, done = "TRT", -1, true
- else
- finish, autodir, done = "TLT", 1, true
- end
- if finidir == finish then
- head = remove_node(head,finished,true)
- if trace_directions then
- list[finipos] = list[finipos] .. " (deleted afterwards)"
- insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
- end
+local function getglobal(a)
+ return a and a > 0 and hasbit(a,m_global)
+end
+
+local function getfences(a)
+ return a and a > 0 and hasbit(a,m_fences)
+end
+
+local function getmethod(a)
+ return a and a > 0 and a % m_enabled or 0
+end
+
+directions.tomode = tomode
+directions.getglobal = getglobal
+directions.getfences = getfences
+directions.getmethod = getmethod
+directions.installhandler = installhandler
+
+-- beware: in dha we have character properties and in dua|b we have direction properties
+
+function directions.setcolor(current,direction,reversed,mirror)
+ if mirror then
+ setcolor(current,"bidi:mirrored")
+ elseif direction == "l" then
+ setcolor(current,reversed and "bidi:left:reversed" or "bidi:left:original")
+ elseif direction == "r" then
+ setcolor(current,reversed and "bidi:right:reversed" or "bidi:right:original")
else
- head, inserted = insert_node_before(head,current,new_textdir("+"..finish))
- if trace_directions then
- insert(list,#list,format("start text dir %s (embedded: %s)",finish,embedded))
- end
+ resetcolor(current)
end
end
--- todo: use new dir functions
+function commands.getbidimode(specification)
+ context(tomode(specification)) -- hash at tex end
+end
-local s_isol = fonts.analyzers.states.isol
+local enabled = false
-function directions.process(namespace,attribute,start) -- todo: make faster
- if not start.next then
- return start, false
- end
- head, current, inserted = start, start, nil
- finish, autodir, embedded, override, done = nil, 0, 0, 0, false
- list, glyphs = trace_directions and { }, false
- finished, finidir, finipos = nil, nil, 1
- local stack, top, obsolete = { }, 0, { }
- local lro, rlo, prevattr, inmath = false, false, 0, false
- while current do
- local id = current.id
- if skipmath and id == math_code then
- local subtype = current.subtype
- if subtype == beginmath_code then
- inmath = true
- elseif subtype == endmath_code then
- inmath = false
- else
- -- todo
- end
- current = current.next
- elseif inmath then
- current = current.next
- else
- local attr = current[attribute]
- if attr and attr > 0 then
- -- current[attribute] = unsetvalue -- slow, needed?
- if attr == 1 then
- -- bidi parsing mode
- elseif attr ~= prevattr then
- -- no pop, grouped driven (2=normal,3=lro,4=rlo)
- if attr == 3 then
- if trace_directions then
- list[#list+1] = format("override right -> left (lro) (bidi=%s)",attr)
- end
- lro, rlo = true, false
- elseif attr == 4 then
- if trace_directions then
- list[#list+1] = format("override left -> right (rlo) (bidi=%s)",attr)
- end
- lro, rlo = false, true
- else
- if trace_directions and
- current ~= head then list[#list+1] = format("override reset (bidi=%s)",attr)
- end
- lro, rlo = false, false
- end
- prevattr = attr
- end
- end
- if id == glyph_code then
- glyphs = true
- if attr and attr > 0 then
- local char = current.char
- local d = chardirs[char]
- if rlo or override > 0 then
- if d == "l" then
- if trace_directions then
- list[#list+1] = format("char %s (%s / U+%04X) of class %s overidden to r (bidi=%s)",utfchar(char),char,char,d,attr)
- end
- d = "r"
- elseif trace_directions then
- if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal
- list[#list+1] = format("override char of class %s (bidi=%s)",d,attr)
- else -- todo: rle lre
- list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr)
- end
- end
- elseif lro or override < 0 then
- if d == "r" or d == "al" then
- current[a_state] = s_isol -- maybe better have a special bidi attr value -> override (9) -> todo
- if trace_directions then
- list[#list+1] = format("char %s (%s / U+%04X) of class %s overidden to l (bidi=%s) (state=isol)",utfchar(char),char,char,d,attr)
- end
- d = "l"
- elseif trace_directions then
- if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal
- list[#list+1] = format("override char of class %s (bidi=%s)",d,attr)
- else -- todo: rle lre
- list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr)
- end
- end
- elseif trace_directions then
- if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal
- list[#list+1] = format("override char of class %s (bidi=%s)",d,attr)
- else -- todo: rle lre
- list[#list+1] = format("char %s (%s / U+%04X) of class %s (bidi=%s)",utfchar(char),char,char,d,attr)
- end
- end
- if d == "on" then
- local mirror = chardata[char].mirror -- maybe make a special mirror table
- if mirror and fontchar[current.font][mirror] then
- -- todo: set attribute
- if autodir < 0 then
- current.char = mirror
- done = true
- --~ elseif left or autodir > 0 then
- --~ if not is_right(current.prev) then
- --~ current.char = mirror
- --~ done = true
- --~ end
- end
- end
- elseif d == "l" or d == "en" then -- european number
- if autodir <= 0 then -- could be option
- force_auto_left_before()
- end
- elseif d == "r" or d == "al" then -- arabic number
- if autodir >= 0 then
- force_auto_right_before()
- end
- elseif d == "an" then -- arabic number
- -- actually this is language dependent ...
--- if autodir <= 0 then
--- force_auto_left_before()
--- end
- if autodir >= 0 then
- force_auto_right_before()
- end
- elseif d == "lro" then -- Left-to-Right Override -> right becomes left
- if trace_directions then
- list[#list+1] = "override right -> left"
- end
- top = top + 1
- stack[top] = { override, embedded }
- override = -1
- obsolete[#obsolete+1] = current
- elseif d == "rlo" then -- Right-to-Left Override -> left becomes right
- if trace_directions then
- list[#list+1] = "override left -> right"
- end
- top = top + 1
- stack[top] = { override, embedded }
- override = 1
- obsolete[#obsolete+1] = current
- elseif d == "lre" then -- Left-to-Right Embedding -> TLT
- if trace_directions then
- list[#list+1] = "embedding left -> right"
- end
- top = top + 1
- stack[top] = { override, embedded }
- embedded = 1
- obsolete[#obsolete+1] = current
- elseif d == "rle" then -- Right-to-Left Embedding -> TRT
- if trace_directions then
- list[#list+1] = "embedding right -> left"
- end
- top = top + 1
- stack[top] = { override, embedded }
- embedded = -1 -- was 1
- obsolete[#obsolete+1] = current
- elseif d == "pdf" then -- Pop Directional Format
- -- override = 0
- if top > 0 then
- local s = stack[top]
- override, embedded = s[1], s[2]
- top = top - 1
- if trace_directions then
- list[#list+1] = format("state: override: %s, embedded: %s, autodir: %s",override,embedded,autodir)
- end
- else
- if trace_directions then
- list[#list+1] = "pop (error, too many pops)"
- end
- end
- obsolete[#obsolete+1] = current
- end
- elseif trace_directions then
- local char = current.char
- local d = chardirs[char]
- list[#list+1] = format("char %s (%s / U+%04X) of class %s (no bidi)",utfchar(char),char,char,d or "?")
- end
- elseif id == whatsit_code then
- if finish then
- finish_auto_before()
- end
- local subtype = current.subtype
- if subtype == localpar_code then
- local dir = current.dir
- local d = sub(dir,2,2)
- if d == 'R' then -- find(dir,".R.") / dir == "TRT"
- autodir = -1
- else
- autodir = 1
- end
- -- embedded = autodir
- if trace_directions then
- list[#list+1] = format("pardir %s",dir)
- end
- elseif subtype == dir_code then
- local dir = current.dir
- -- local sign = sub(dir,1,1)
- -- local dire = sub(dir,3,3)
- local sign, dire = match(dir,"^(.).(.)")
- if dire == "R" then
- if sign == "+" then
- finish, autodir = "TRT", -1
- else
- finish, autodir = nil, 0
- end
- else
- if sign == "+" then
- finish, autodir = "TLT", 1
- else
- finish, autodir = nil, 0
- end
- end
- if trace_directions then
- list[#list+1] = format("textdir %s",dir)
- end
- end
- else
- if trace_directions then
- list[#list+1] = format("node %s (subtype %s)",nodecodes[id],current.subtype)
- end
- if finish then
- finish_auto_before()
- end
- end
- local cn = current.next
- if not cn then
- if finish then
- finish_auto_after()
- end
- end
- current = cn
- end
+local starttiming = statistics.starttiming
+local stoptiming = statistics.stoptiming
+
+function directions.handler(head) -- ,_,_,_,direction) -- nodes not nuts | 5th arg is direction
+ if not head.next then
+ return head, false
end
- if trace_directions and glyphs then
- report_directions("start log")
- for i=1,#list do
- report_directions("%02i: %s",i,list[i])
- end
- report_directions("stop log")
+ local attr = head[a_directions]
+ if not attr or attr == 0 then
+ return head, false
end
- if done and strip then
- local n = #obsolete
- if n > 0 then
- for i=1,n do
- remove_node(head,obsolete[i],true)
- end
- report_directions("%s character nodes removed",n)
- end
+ local method = getmethod(attr)
+ local handler = handlers[method]
+ if not handler then
+ return head, false
end
+ starttiming(directions)
+ local head, done = handler(head)
+ stoptiming(directions)
return head, done
end
---~ local function is_right(n) -- keep !
---~ if n then
---~ local id = n.id
---~ if id == glyph_code then
---~ local attr = n[attribute]
---~ if attr and attr > 0 then
---~ local d = chardirs[n.char]
---~ if d == "r" or d == "al" then -- override
---~ return true
---~ end
---~ end
---~ end
---~ end
---~ return false
---~ end
-
---~ function directions.enable()
---~ tasks.enableaction("processors","directions.handler")
---~ end
+statistics.register("text directions", function()
+ if enabled then
+ return statistics.elapsedseconds(directions)
+ end
+end)
-local enabled = false
+-- function directions.enable()
+-- tasks.enableaction("processors","directions.handler")
+-- end
function directions.set(n) -- todo: names and numbers
if not enabled then
- if trace_directions then
- report_breakpoints("enabling directions handler")
+ if trace_textdirections then
+ report_textdirections("enabling directions handler")
end
tasks.enableaction("processors","typesetters.directions.handler")
enabled = true
@@ -451,13 +231,7 @@ function directions.set(n) -- todo: names and numbers
n = unsetvalue
-- maybe tracing
end
- texattribute[a_directions] = n
+ texsetattribute(a_directions,n)
end
commands.setdirection = directions.set
-
-directions.handler = nodes.installattributehandler {
- name = "directions",
- namespace = directions,
- processor = directions.process,
-}