if not modules then modules = { } end modules ['typo-dua'] = { version = 1.001, comment = "companion to typo-dir.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team / See below", license = "see context related readme files / whatever applies", comment = "Unicode bidi (sort of) variant a", derived = "derived from t-bidi by Khaled Hosny who derived from minibidi.c by Arabeyes", } -- Comment by Khaled Hosny: -- -- This code started as a line for line translation of Arabeyes' minibidi.c from C to Lua, -- excluding parts that of no use to us like shaping. The C code is Copyright (c) 2004 -- Ahmad Khalifa, and is distributed under the MIT Licence. The full license text can be -- found at: http://svn.arabeyes.org/viewvc/projects/adawat/minibidi/LICENCE. -- -- Comment by Hans Hagen: -- -- The initial conversion to Lua has been done by Khaled Hosny. As a first step I optimized the -- code (to suit todays context mkiv). Next I fixed the foreign object handling, for instance, -- we can skip over math but we need to inject before the open math node and after the close node, -- so we need to keep track of the endpoint. After I fixed that bit I realized that it was possible -- to generalize the object skipper if only because it saves memory (and processing time). The -- current implementation is about three times as fast (roughly measured) and I can probably squeeze -- out some more, only to sacrifice soem when I start adding features. A next stage will be to have -- more granularity in foreign objects. Of course all errors are mine. I'll also added the usual bit -- of context tracing and reshuffled some code. A memory optimization is on the agenda (already sort -- of prepared). It is no longer line by line. -- -- The first implementation of bidi in context started out from examples of mixed usage (including -- more than text) with an at that point bugged r2l support. It has some alternatives for letting -- the tex markup having a bit higher priority. I will probably add some local (style driven) -- overrides to the following code as well. It also means that we can selectively enable and disable -- the parser (because a document wide appliance migh tnot be what we want). This will bring a -- slow down but not that much. (I need to check with Idris why we have things like isol there.) -- -- We'll probably keep multiple methods around (this is just a side track of improving the already -- available scanner). I need to look into the changed unicode recomendations anyway as a first -- impression is that some fuzzyness has been removed. I finally need to spend time on those specs. So, -- there will be a third variant (written from scratch) so some point. The fun about TeX is that we -- can provide alternative solutions (given that it doesn't bloat the engine!) -- -- A test with some hebrew, mixed with hboxes with latin/hebrew and simple math. In fact this triggered -- playing with bidi again: -- -- 0.11 : nothing -- 0.14 : 0.03 node list only, one pass -- 0.23 : 0.12 close to unicode bidi, multipass -- 0.44 : 0.33 original previous -- -- todo: check for introduced errors -- todo: reuse list, we have size, so we can just change values (and auto allocate when not there) -- todo: reuse the stack -- todo: no need for a max check -- todo: collapse bound similar ranges (not ok yet) -- tood: combine some sweeps -- -- This one wil get frozen (or if needed in sync with basic t-bidi) and I will explore more options -- in typo-dub.lua. There I might also be able to improve performance a bit. Derived and improved -- versions will also be sped up local insert, remove, unpack, concat = table.insert, table.remove, table.unpack, table.concat local utfchar = utf.char local formatters = string.formatters local directiondata = characters.directions local mirrordata = characters.mirrors local nuts = nodes.nuts local getnext = nuts.getnext local getid = nuts.getid local getsubtype = nuts.getsubtype local getlist = nuts.getlist local getchar = nuts.getchar local getprop = nuts.getprop local getdirection = nuts.getdirection local setprop = nuts.setprop local setchar = nuts.setchar local setdirection = nuts.setdirection ----- setattrlist = nuts.setattrlist local remove_node = nuts.remove local insert_node_after = nuts.insert_after local insert_node_before = nuts.insert_before local nodepool = nuts.pool local new_direction = nodepool.direction local nodecodes = nodes.nodecodes local gluecodes = nodes.gluecodes local glyph_code = nodecodes.glyph local glue_code = nodecodes.glue local hlist_code = nodecodes.hlist local vlist_code = nodecodes.vlist local math_code = nodecodes.math local dir_code = nodecodes.dir local localpar_code = nodecodes.localpar local parfillskip_code = gluecodes.parfillskip local dirvalues = nodes.dirvalues local lefttoright_code = dirvalues.lefttoright local righttoleft_code = dirvalues.righttoleft local maximum_stack = 60 local directions = typesetters.directions local setcolor = directions.setcolor local remove_controls = true directives.register("typesetters.directions.one.removecontrols",function(v) remove_controls = v end) local report_directions = logs.reporter("typesetting","directions one") local trace_directions = false trackers .register("typesetters.directions", function(v) trace_directions = v end) local trace_details = false trackers .register("typesetters.directions.details", function(v) trace_details = v end) local whitespace = { lre = true, rle = true, lro = true, rlo = true, pdf = true, bn = true, ws = true, } local b_s_ws_on = { b = true, s = true, ws = true, on = true } -- tracing local function show_list(list,size,what) local what = what or "direction" local joiner = utfchar(0x200C) local result = { } for i=1,size do local entry = list[i] local character = entry.char local direction = entry[what] if character == 0xFFFC then local first = entry.id local last = entry.last local skip = entry.skip if last then result[i] = formatters["%-3s:%s %s..%s (%i)"](direction,joiner,nodecodes[first],nodecodes[last],skip or 0) else result[i] = formatters["%-3s:%s %s (%i)"](direction,joiner,nodecodes[first],skip or 0) end elseif character >= 0x202A and character <= 0x202C then result[i] = formatters["%-3s:%s %U"](direction,joiner,character) else result[i] = formatters["%-3s:%s %c %U"](direction,joiner,character,character) end end return concat(result,joiner .. " | " .. joiner) end -- preparation local function show_done(list,size) local joiner = utfchar(0x200C) local result = { } for i=1,size do local entry = list[i] local character = entry.char local begindir = entry.begindir local enddir = entry.enddir if begindir then result[#result+1] = formatters["<%s>"](begindir) end if entry.remove then -- continue elseif character == 0xFFFC then result[#result+1] = formatters["<%s>"]("?") elseif character == 0x0020 then result[#result+1] = formatters["<%s>"](" ") elseif character >= 0x202A and character <= 0x202C then result[#result+1] = formatters["<%s>"](entry.original) else result[#result+1] = utfchar(character) end if enddir then result[#result+1] = formatters["<%s>"](enddir) end end return concat(result,joiner) end -- keeping the list and overwriting doesn't save much runtime, only a few percent -- char is only used for mirror, so in fact we can as well only store it for -- glyphs only local function build_list(head) -- todo: store node pointer ... saves loop -- P1 local current = head local list = { } local size = 0 while current do size = size + 1 local id = getid(current) if getprop(current,"directions") then local skip = 0 local last = id current = getnext(current) while current do local id = getid(current) if getprop(current,"directions") then skip = skip + 1 last = id current = getnext(current) else break end end if id == last then -- the start id list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } else list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id, last = last } end elseif id == glyph_code then local chr = getchar(current) local dir = directiondata[chr] list[size] = { char = chr, direction = dir, original = dir, level = 0 } current = getnext(current) elseif id == glue_code then -- and how about kern list[size] = { char = 0x0020, direction = "ws", original = "ws", level = 0 } current = getnext(current) elseif id == dir_code then local direction, pop = getdirection(current) if direction == lefttoright_code then if pop then list[size] = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } else list[size] = { char = 0x202A, direction = "lre", original = "lre", level = 0 } end elseif direction == righttoleft_code then if pop then list[size] = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } else list[size] = { char = 0x202B, direction = "rle", original = "rle", level = 0 } end else list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, id = id } -- object replacement character end current = getnext(current) elseif id == math_code then local skip = 0 current = getnext(current) while getid(current) ~= math_code do skip = skip + 1 current = getnext(current) end skip = skip + 1 current = getnext(current) list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } else local skip = 0 local last = id current = getnext(current) while n do local id = getid(current) if id ~= glyph_code and id ~= glue_code and id ~= dir_code then skip = skip + 1 last = id current = getnext(current) else break end end if id == last then -- the start id list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } else list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id, last = last } end end end return list, size end -- the action -- local function find_run_limit_et(list,run_start,limit) -- local run_limit = run_start -- local i = run_start -- while i <= limit and list[i].direction == "et" do -- run_limit = i -- i = i + 1 -- end -- return run_limit -- end local function find_run_limit_et(list,start,limit) -- returns last match for i=start,limit do if list[i].direction == "et" then start = i else return start end end return start end -- local function find_run_limit_b_s_ws_on(list,run_start,limit) -- local run_limit = run_start -- local i = run_start -- while i <= limit and b_s_ws_on[list[i].direction] do -- run_limit = i -- i = i + 1 -- end -- return run_limit -- end local function find_run_limit_b_s_ws_on(list,start,limit) for i=start,limit do if b_s_ws_on[list[i].direction] then start = i else return start end end return start end local function get_baselevel(head,list,size,direction) -- This is an adapted version: if direction == lefttoright_code or direction == righttoleft_code then return direction, true elseif getid(head) == localpar_code and getsubtype(head) == 0 then direction = getdirection(head) if direction == lefttoright_code or direction == righttoleft_code then return direction, true end end -- for old times sake we we handle strings too if direction == "TLT" then return lefttoright_code, true elseif direction == "TRT" then return righttoleft_code, true end -- P2, P3 for i=1,size do local entry = list[i] local direction = entry.direction if direction == "r" or direction == "al" then return righttoleft_code, true elseif direction == "l" then return lefttoright_code, true end end return lefttoright_code, false end local function resolve_explicit(list,size,baselevel) -- X1 local level = baselevel local override = "on" local stack = { } local nofstack = 0 for i=1,size do local entry = list[i] local direction = entry.direction -- X2 if direction == "rle" then if nofstack < maximum_stack then nofstack = nofstack + 1 stack[nofstack] = { level, override } level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) override = "on" entry.level = level entry.direction = "bn" entry.remove = true elseif trace_directions then report_directions("stack overflow at position %a with direction %a",i,direction) end -- X3 elseif direction == "lre" then if nofstack < maximum_stack then nofstack = nofstack + 1 stack[nofstack] = { level, override } level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) override = "on" entry.level = level entry.direction = "bn" entry.remove = true elseif trace_directions then report_directions("stack overflow at position %a with direction %a",i,direction) end -- X4 elseif direction == "rlo" then if nofstack < maximum_stack then nofstack = nofstack + 1 stack[nofstack] = { level, override } level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) override = "r" entry.level = level entry.direction = "bn" entry.remove = true elseif trace_directions then report_directions("stack overflow at position %a with direction %a",i,direction) end -- X5 elseif direction == "lro" then if nofstack < maximum_stack then nofstack = nofstack + 1 stack[nofstack] = { level, override } level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) override = "l" entry.level = level entry.direction = "bn" entry.remove = true elseif trace_directions then report_directions("stack overflow at position %a with direction %a",i,direction) end -- X7 elseif direction == "pdf" then if noifstack > 0 then local stacktop = stack[nofstack] nofstack = nofstack - 1 level = stacktop[1] override = stacktop[2] entry.level = level entry.direction = "bn" entry.remove = true elseif trace_directions then report_directions("stack underflow at position %a with direction %a",i,direction) end -- X6 else entry.level = level if override ~= "on" then entry.direction = override end end end -- X8 (reset states and overrides after paragraph) end local function resolve_weak(list,size,start,limit,sor,eor) -- W1 for i=start,limit do local entry = list[i] if entry.direction == "nsm" then if i == start then entry.direction = sor else entry.direction = list[i-1].direction end end end -- W2 for i=start,limit do local entry = list[i] if entry.direction == "en" then for j=i-1,start,-1 do local prev = list[j] local direction = prev.direction if direction == "al" then entry.direction = "an" break elseif direction == "r" or direction == "l" then break end end end end -- W3 for i=start,limit do local entry = list[i] if entry.direction == "al" then entry.direction = "r" end end -- W4 for i=start+1,limit-1 do local entry = list[i] local direction = entry.direction if direction == "es" then if list[i-1].direction == "en" and list[i+1].direction == "en" then entry.direction = "en" end elseif direction == "cs" then local prevdirection = list[i-1].direction if prevdirection == "en" then if list[i+1].direction == "en" then entry.direction = "en" end elseif prevdirection == "an" and list[i+1].direction == "an" then entry.direction = "an" end end end -- W5 local i = start while i <= limit do if list[i].direction == "et" then local runstart = i local runlimit = find_run_limit_et(list,runstart,limit) -- when moved inline we can probably collapse a lot local rundirection = runstart == start and sor or list[runstart-1].direction if rundirection ~= "en" then rundirection = runlimit == limit and eor or list[runlimit+1].direction end if rundirection == "en" then for j=runstart,runlimit do list[j].direction = "en" end end i = runlimit end i = i + 1 end -- W6 for i=start,limit do local entry = list[i] local direction = entry.direction if direction == "es" or direction == "et" or direction == "cs" then entry.direction = "on" end end -- W7 for i=start,limit do local entry = list[i] if entry.direction == "en" then local prev_strong = sor for j=i-1,start,-1 do local direction = list[j].direction if direction == "l" or direction == "r" then prev_strong = direction break end end if prev_strong == "l" then entry.direction = "l" end end end end local function resolve_neutral(list,size,start,limit,sor,eor) -- N1, N2 for i=start,limit do local entry = list[i] if b_s_ws_on[entry.direction] then local leading_direction, trailing_direction, resolved_direction local runstart = i local runlimit = find_run_limit_b_s_ws_on(list,runstart,limit) if runstart == start then leading_direction = sor else leading_direction = list[runstart-1].direction if leading_direction == "en" or leading_direction == "an" then leading_direction = "r" end end if runlimit == limit then trailing_direction = eor else trailing_direction = list[runlimit+1].direction if trailing_direction == "en" or trailing_direction == "an" then trailing_direction = "r" end end if leading_direction == trailing_direction then -- N1 resolved_direction = leading_direction else -- N2 / does the weird period resolved_direction = entry.level % 2 == 1 and "r" or "l" -- direction_of_level(entry.level) end for j=runstart,runlimit do list[j].direction = resolved_direction end i = runlimit end i = i + 1 end end -- local function resolve_implicit(list,size,start,limit,sor,eor) -- -- I1 -- for i=start,limit do -- local entry = list[i] -- local level = entry.level -- if level % 2 ~= 1 then -- not odd(level) -- local direction = entry.direction -- if direction == "r" then -- entry.level = level + 1 -- elseif direction == "an" or direction == "en" then -- entry.level = level + 2 -- end -- end -- end -- -- I2 -- for i=start,limit do -- local entry = list[i] -- local level = entry.level -- if level % 2 == 1 then -- odd(level) -- local direction = entry.direction -- if direction == "l" or direction == "en" or direction == "an" then -- entry.level = level + 1 -- end -- end -- end -- end local function resolve_implicit(list,size,start,limit,sor,eor) for i=start,limit do local entry = list[i] local level = entry.level local direction = entry.direction if level % 2 ~= 1 then -- even -- I1 if direction == "r" then entry.level = level + 1 elseif direction == "an" or direction == "en" then entry.level = level + 2 end else -- I2 if direction == "l" or direction == "en" or direction == "an" then entry.level = level + 1 end end end end local function resolve_levels(list,size,baselevel) -- X10 local start = 1 while start < size do local level = list[start].level local limit = start + 1 while limit < size and list[limit].level == level do limit = limit + 1 end local prev_level = start == 1 and baselevel or list[start-1].level local next_level = limit == size and baselevel or list[limit+1].level local sor = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,prev_level)) local eor = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,next_level)) -- W1 .. W7 resolve_weak(list,size,start,limit,sor,eor) -- N1 .. N2 resolve_neutral(list,size,start,limit,sor,eor) -- I1 .. I2 resolve_implicit(list,size,start,limit,sor,eor) start = limit end -- L1 for i=1,size do local entry = list[i] local direction = entry.original -- (1) if direction == "s" or direction == "b" then entry.level = baselevel -- (2) for j=i-1,1,-1 do local entry = list[j] if whitespace[entry.original] then entry.level = baselevel else break end end end end -- (3) for i=size,1,-1 do local entry = list[i] if whitespace[entry.original] then entry.level = baselevel else break end end -- L4 for i=1,size do local entry = list[i] if entry.level % 2 == 1 then -- odd(entry.level) local mirror = mirrordata[entry.char] if mirror then entry.mirror = mirror end end end end local function insert_dir_points(list,size) -- L2, but no actual reversion is done, we simply annotate where -- begindir/endddir node will be inserted. local maxlevel = 0 local finaldir = false for i=1,size do local level = list[i].level if level > maxlevel then maxlevel = level end end for level=0,maxlevel do local started = false local begindir = nil local enddir = nil if level % 2 == 1 then begindir = righttoleft_code enddir = righttoleft_code else begindir = lefttoright_code enddir = lefttoright_code end for i=1,size do local entry = list[i] if entry.level >= level then if not started then entry.begindir = begindir started = true end else if started then list[i-1].enddir = enddir started = false end end end -- make sure to close the run at end of line if started then finaldir = enddir end end if finaldir then list[size].enddir = finaldir end end local function apply_to_list(list,size,head,pardir) local index = 1 local current = head while current do if index > size then report_directions("fatal error, size mismatch") break end local id = getid(current) local entry = list[index] local begindir = entry.begindir local enddir = entry.enddir setprop(current,"directions",true) if id == glyph_code then local mirror = entry.mirror if mirror then setchar(current,mirror) end if trace_directions then local direction = entry.direction setcolor(current,direction,false,mirror) end elseif id == hlist_code or id == vlist_code then setdirection(current,pardir) -- is this really needed? elseif id == glue_code then if enddir and getsubtype(current) == parfillskip_code then -- insert the last enddir before \parfillskip glue local d = new_direction(enddir,true) -- setprop(d,"directions",true) -- setattrlist(d,current) head = insert_node_before(head,current,d) enddir = false end elseif begindir then if id == localpar_code and getsubtype(current) == 0 then -- localpar should always be the 1st node local d = new_direction(begindir) -- setprop(d,"directions",true) -- setattrlist(d,current) head, current = insert_node_after(head,current,d) begindir = nil end end if begindir then local d = new_direction(begindir) -- setprop(d,"directions",true) -- setattrlist(d,current) head = insert_node_before(head,current,d) end local skip = entry.skip if skip and skip > 0 then for i=1,skip do current = getnext(current) setprop(current,"directions",true) end end if enddir then local d = new_direction(enddir,true) -- setprop(d,"directions",true) -- setattrlist(d,current) head, current = insert_node_after(head,current,d) end if not entry.remove then current = getnext(current) elseif remove_controls then -- X9 head, current = remove_node(head,current,true) else current = getnext(current) end index = index + 1 end return head end local function process(head,direction,only_one,where) -- This is an adapted version: local list, size = build_list(head) local baselevel, dirfound = get_baselevel(head,list,size,direction) if not dirfound and trace_details then report_directions("no initial direction found, gambling") end if trace_details then report_directions("before : %s",show_list(list,size,"original")) end resolve_explicit(list,size,baselevel) resolve_levels(list,size,baselevel) insert_dir_points(list,size) if trace_details then report_directions("after : %s",show_list(list,size,"direction")) report_directions("result : %s",show_done(list,size)) end return apply_to_list(list,size,head,baselevel) end directions.installhandler(interfaces.variables.one,process)