diff options
author | Marius <mariausol@gmail.com> | 2013-09-09 21:00:31 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2013-09-09 21:00:31 +0300 |
commit | 2ea8d2cec1e6a94c6441926d2965fd36ad4365e3 (patch) | |
tree | 3b8e977613f9cf8c254378634716faf0a6fe1bdd /tex/context/base/typo-dua.lua | |
parent | 957fcf6d8143398d66ff1ddb4c9ff7eeb9bb5bb0 (diff) | |
download | context-2ea8d2cec1e6a94c6441926d2965fd36ad4365e3.tar.gz |
beta 2013.09.09 19:45
Diffstat (limited to 'tex/context/base/typo-dua.lua')
-rw-r--r-- | tex/context/base/typo-dua.lua | 785 |
1 files changed, 785 insertions, 0 deletions
diff --git a/tex/context/base/typo-dua.lua b/tex/context/base/typo-dua.lua new file mode 100644 index 000000000..8eb0a7859 --- /dev/null +++ b/tex/context/base/typo-dua.lua @@ -0,0 +1,785 @@ +if not modules then modules = { } end modules ['typo-dua'] = { + version = 1.001, + comment = "companion to typo-dir.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team / See below", + license = "see context related readme files / whatever applies", + comment = "Unicode bidi (sort of) variant a", + derived = "derived from t-bidi by Khaled Hosny who derived from minibidi.c by Arabeyes", +} + +-- Comment by Khaled Hosny: +-- +-- This code started as a line for line translation of Arabeyes' minibidi.c from C to Lua, +-- excluding parts that of no use to us like shaping. The C code is Copyright (c) 2004 +-- Ahmad Khalifa, and is distributed under the MIT Licence. The full license text can be +-- found at: http://svn.arabeyes.org/viewvc/projects/adawat/minibidi/LICENCE. +-- +-- Comment by Hans Hagen: +-- +-- The initial conversion to Lua has been done by Khaled Hosny. As a first step I optimized the +-- code (to suit todays context mkiv). Next I fixed the foreign object handling, for instance, +-- we can skip over math but we need to inject before the open math node and after the close node, +-- so we need to keep track of the endpoint. After I fixed that bit I realized that it was possible +-- to generalize the object skipper if only because it saves memory (and processing time). The +-- current implementation is about three times as fast (roughly measured) and I can probably squeeze +-- out some more, only to sacrifice soem when I start adding features. A next stage will be to have +-- more granularity in foreign objects. Of course all errors are mine. I'll also added the usual bit +-- of context tracing and reshuffled some code. A memory optimization is on the agenda (already sort +-- of prepared). It is no longer line by line. +-- +-- The first implementation of bidi in context started out from examples of mixed usage (including +-- more than text) with an at that point bugged r2l support. It has some alternatives for letting +-- the tex markup having a bit higher priority. I will probably add some local (style driven) +-- overrides to the following code as well. It also means that we can selectively enable and disable +-- the parser (because a document wide appliance migh tnot be what we want). This will bring a +-- slow down but not that much. (I need to check with Idris why we have things like isol there.) +-- +-- We'll probably keep multiple methods around (this is just a side track of improving the already +-- available scanner). I need to look into the changed unicode recomendations anyway as a first +-- impression is that some fuzzyness has been removed. I finally need to spend time on those specs. So, +-- there will be a third variant (written from scratch) so some point. The fun about TeX is that we +-- can provide alternative solutions (given that it doesn't bloat the engine!) +-- +-- A test with some hebrew, mixed with hboxes with latin/hebrew and simple math. In fact this triggered +-- playing with bidi again: +-- +-- 0.11 : nothing +-- 0.14 : 0.03 node list only, one pass +-- 0.23 : 0.12 close to unicode bidi, multipass +-- 0.44 : 0.33 original previous +-- +-- todo: check for introduced errors +-- todo: reuse list, we have size, so we can just change values (and auto allocate when not there) +-- todo: reuse the stack +-- todo: no need for a max check +-- todo: collapse bound similar ranges (not ok yet) +-- tood: combine some sweeps +-- +-- This one wil get frozen (or if needed in sync with basic t-bidi) and I will explore more options +-- in typo-dub.lua. There I might also be able to improve performance a bit. + +local insert, remove, unpack, concat = table.insert, table.remove, table.unpack, table.concat +local utfchar = utf.char +local formatters = string.formatters + +local directiondata = characters.directions +local mirrordata = characters.mirrors + +local remove_node = nodes.remove +local insert_node_after = nodes.insert_after +local insert_node_before = nodes.insert_before + +local nodepool = nodes.pool +local new_textdir = nodepool.textdir + +local nodecodes = nodes.nodecodes +local whatsitcodes = nodes.whatsitcodes +local skipcodes = nodes.skipcodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local hlist_code = nodecodes.hlist +local vlist_code = nodecodes.vlist +local math_code = nodecodes.math +local whatsit_code = nodecodes.whatsit +local dir_code = whatsitcodes.dir +local localpar_code = whatsitcodes.localpar +local parfillskip_code = skipcodes.skipcodes + +----- object_replacement = 0xFFFC -- object replacement character +local maximum_stack = 60 -- probably spec but not needed + +local setcolor = nodes.tracers.colors.set +local resetcolor = nodes.tracers.colors.reset + +local directions = typesetters.directions + +local a_directions = attributes.private('directions') + +local remove_controls = true directives.register("typesetters.directions.one.removecontrols",function(v) remove_controls = v end) + +local trace_directions = false trackers .register("typesetters.directions.one", function(v) trace_directions = v end) +local trace_details = false trackers .register("typesetters.directions.one.details", function(v) trace_details = v end) + +local report_directions = logs.reporter("typesetting","directions one") + +local whitespace = { + lre = true, + rle = true, + lro = true, + rlo = true, + pdf = true, + bn = true, + ws = true, +} + +local b_s_ws_on = { + b = true, + s = true, + ws = true, + on = true +} + +-- tracing + +local function show_list(list,size,what) + local what = what or "direction" + local joiner = utfchar(0x200C) + local result = { } + for i=1,size do + local entry = list[i] + local character = entry.char + local direction = entry[what] + if character == 0xFFFC then + local first = entry.id + local last = entry.last + local skip = entry.skip + if last then + result[i] = formatters["%-3s:%s %s..%s (%i)"](direction,joiner,nodecodes[first],nodecodes[last],skip or 0) + else + result[i] = formatters["%-3s:%s %s (%i)"](direction,joiner,nodecodes[first],skip or 0) + end + elseif character >= 0x202A and character <= 0x202C then + result[i] = formatters["%-3s:%s %U"](direction,joiner,character) + else + result[i] = formatters["%-3s:%s %c %U"](direction,joiner,character,character) + end + end + return concat(result,joiner .. " | " .. joiner) +end + +-- preparation + +local function show_done(list,size) + local joiner = utfchar(0x200C) + local result = { } + for i=1,size do + local entry = list[i] + local character = entry.char + local begindir = entry.begindir + local enddir = entry.enddir + if begindir then + result[#result+1] = formatters["<%s>"](begindir) + end + if entry.remove then + -- continue + elseif character == 0xFFFC then + result[#result+1] = formatters["<%s>"]("?") + elseif character == 0x0020 then + result[#result+1] = formatters["<%s>"](" ") + elseif character >= 0x202A and character <= 0x202C then + result[#result+1] = formatters["<%s>"](entry.original) + else + result[#result+1] = utfchar(character) + end + if enddir then + result[#result+1] = formatters["<%s>"](enddir) + end + end + return concat(result,joiner) +end + +-- keeping the list and overwriting doesn't save much runtime, only a few percent +-- char is only used for mirror, so in fact we can as well only store it for +-- glyphs only + +local function build_list(head) -- todo: store node pointer ... saves loop + -- P1 + local current = head + local list = { } + local size = 0 + while current do + size = size + 1 + local id = current.id + if id == glyph_code then + local chr = current.char + local dir = directiondata[chr] + list[size] = { char = chr, direction = dir, original = dir, level = 0 } + current = current.next + elseif id == glue_code then + list[size] = { char = 0x0020, direction = "ws", original = "ws", level = 0 } + current = current.next + elseif id == whatsit_code and current.subtype == dir_code then + local dir = current.dir + if dir == "+TLT" then + list[size] = { char = 0x202A, direction = "lre", original = "lre", level = 0 } + elseif dir == "+TRT" then + list[size] = { char = 0x202B, direction = "rle", original = "rle", level = 0 } + elseif dir == "-TLT" or dir == "-TRT" then + list[size] = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } + else + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, id = id } -- object replacement character + end + current = current.next + elseif id == math_code then + local skip = 0 + current = current.next + while current.id ~= math_code do + skip = skip + 1 + current = current.next + end + skip = skip + 1 + current = current.next + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } + else + local skip = 0 + local last = id + current = current.next + while n do + local id = current.id + if id ~= glyph_code and id ~= glue_code and not (id == whatsit_code and current.subtype == dir_code) then + skip = skip + 1 + last = id + current = current.next + else + break + end + end + if id == last then + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } + else + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id, last = last } + end + end + end + return list, size +end + +-- the action + +-- local function find_run_limit_et(list,run_start,limit) +-- local run_limit = run_start +-- local i = run_start +-- while i <= limit and list[i].direction == "et" do +-- run_limit = i +-- i = i + 1 +-- end +-- return run_limit +-- end + +local function find_run_limit_et(list,start,limit) -- returns last match + for i=start,limit do + if list[i].direction == "et" then + start = i + else + return start + end + end + return start +end + +-- local function find_run_limit_b_s_ws_on(list,run_start,limit) +-- local run_limit = run_start +-- local i = run_start +-- while i <= limit and b_s_ws_on[list[i].direction] do +-- run_limit = i +-- i = i + 1 +-- end +-- return run_limit +-- end + +local function find_run_limit_b_s_ws_on(list,start,limit) + for i=start,limit do + if b_s_ws_on[list[i].direction] then + start = i + else + return start + end + end + return start +end + +-- directions.maindir = "r2l" + +local function get_baselevel(head,list,size) -- todo: skip if first is object (or pass head and test for local_par) + local maindir = directions.maindir + if maindir == "r2l" then + return 1, "TRT", false + elseif maindir == "l2r" then + return 0, "TLT", false + elseif head.id == whatsit_code and head.subtype == localpar_code then + if head.dir == "TRT" then + return 1, "TRT", true + else + return 0, "TLT", true + end + else + -- P2, P3 + for i=1,size do + local entry = list[i] + local direction = entry.direction + if direction == "r" or direction == "al" then + return 1, "TRT", true + elseif direction == "l" then + return 0, "TLT", true + end + end + return 0, "TLT", false + end +end + +local function resolve_explicit(list,size,baselevel) + -- X1 + local level = baselevel + local override = "on" + local stack = { } + local nofstack = 0 + for i=1,size do + local entry = list[i] + local direction = entry.direction + -- X2 + if direction == "rle" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X3 + elseif direction == "lre" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X4 + elseif direction == "rlo" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "r" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X5 + elseif direction == "lro" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "l" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X7 + elseif direction == "pdf" then + if nofstack < maximum_stack then + local stacktop = stack[nofstack] + nofstack = nofstack - 1 + level = stacktop[1] + override = stacktop[2] + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X6 + else + entry.level = level + if override ~= "on" then + entry.direction = override + end + end + end + -- X8 (reset states and overrides after paragraph) +end + +local function resolve_weak(list,size,start,limit,sor,eor) + -- W1 + for i=start,limit do + local entry = list[i] + if entry.direction == "nsm" then + if i == start then + entry.direction = sor + else + entry.direction = list[i-1].direction + end + end + end + -- W2 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + for j=i-1,start,-1 do + local prev = list[j] + local direction = prev.direction + if direction == "al" then + entry.direction = "an" + break + elseif direction == "r" or direction == "l" then + break + end + end + end + end + -- W3 + for i=start,limit do + local entry = list[i] + if entry.direction == "al" then + entry.direction = "r" + end + end + -- W4 + for i=start+1,limit-1 do + local entry = list[i] + local direction = entry.direction + if direction == "es" then + if list[i-1].direction == "en" and list[i+1].direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = list[i-1].direction + if prevdirection == "en" then + if list[i+1].direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and list[i+1].direction == "an" then + entry.direction = "an" + end + end + end + -- W5 + local i = start + while i <= limit do + if list[i].direction == "et" then + local runstart = i + local runlimit = find_run_limit_et(list,runstart,limit) -- when moved inline we can probably collapse a lot + local rundirection = runstart == start and sor or list[runstart-1].direction + if rundirection ~= "en" then + rundirection = runlimit == limit and eor or list[runlimit+1].direction + end + if rundirection == "en" then + for j=runstart,runlimit do + list[j].direction = "en" + end + end + i = runlimit + end + i = i + 1 + end + -- W6 + for i=start,limit do + local entry = list[i] + local direction = entry.direction + if direction == "es" or direction == "et" or direction == "cs" then + entry.direction = "on" + end + end + -- W7 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + local prev_strong = sor + for j=i-1,start,-1 do + local direction = list[j].direction + if direction == "l" or direction == "r" then + prev_strong = direction + break + end + end + if prev_strong == "l" then + entry.direction = "l" + end + end + end +end + +local function resolve_neutral(list,size,start,limit,sor,eor) + -- N1, N2 + for i=start,limit do + local entry = list[i] + if b_s_ws_on[entry.direction] then + local leading_direction, trailing_direction, resolved_direction + local runstart = i + local runlimit = find_run_limit_b_s_ws_on(list,runstart,limit) + if runstart == start then + leading_direction = sor + else + leading_direction = list[runstart-1].direction + if leading_direction == "en" or leading_direction == "an" then + leading_direction = "r" + end + end + if runlimit == limit then + trailing_direction = eor + else + trailing_direction = list[runlimit+1].direction + if trailing_direction == "en" or trailing_direction == "an" then + trailing_direction = "r" + end + end + if leading_direction == trailing_direction then + -- N1 + resolved_direction = leading_direction + else + -- N2 / does the weird period + resolved_direction = entry.level % 2 == 1 and "r" or "l" -- direction_of_level(entry.level) + end + for j=runstart,runlimit do + list[j].direction = resolved_direction + end + i = runlimit + end + i = i + 1 + end +end + +local function resolve_implicit(list,size,start,limit,sor,eor) + -- I1 + for i=start,limit do + local entry = list[i] + local level = entry.level + if level % 2 ~= 1 then -- not odd(level) + local direction = entry.direction + if direction == "r" then + entry.level = level + 1 + elseif direction == "an" or direction == "en" then + entry.level = level + 2 + end + end + end + -- I2 + for i=start,limit do + local entry = list[i] + local level = entry.level + if level % 2 == 1 then -- odd(level) + local direction = entry.direction + if direction == "l" or direction == "en" or direction == "an" then + entry.level = level + 1 + end + end + end +end + +local function resolve_levels(list,size,baselevel) + -- X10 + local start = 1 + while start < size do + local level = list[start].level + local limit = start + 1 + while limit < size and list[limit].level == level do + limit = limit + 1 + end + local prev_level = start == 1 and baselevel or list[start-1].level + local next_level = limit == size and baselevel or list[limit+1].level + local sor = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,prev_level)) + local eor = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,next_level)) + -- W1 .. W7 + resolve_weak(list,size,start,limit,sor,eor) + -- N1 .. N2 + resolve_neutral(list,size,start,limit,sor,eor) + -- I1 .. I2 + resolve_implicit(list,size,start,limit,sor,eor) + start = limit + end + -- L1 + for i=1,size do + local entry = list[i] + local direction = entry.original + -- (1) + if direction == "s" or direction == "b" then + entry.level = baselevel + -- (2) + for j=i-1,1,-1 do + local entry = list[j] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + end + end + -- (3) + for i=size,1,-1 do + local entry = list[i] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + -- L4 + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + local mirror = mirrordata[entry.char] + if mirror then + entry.mirror = mirror + end + end + end +end + +local function insert_dir_points(list,size) + -- L2, but no actual reversion is done, we simply annotate where + -- begindir/endddir node will be inserted. + local maxlevel = 0 + local finaldir = false + for i=1,size do + local level = list[i].level + if level > maxlevel then + maxlevel = level + end + end + for level=0,maxlevel do + local started = false + local begindir = nil + local enddir = nil + if level % 2 == 1 then + begindir = "+TRT" + enddir = "-TRT" + else + begindir = "+TLT" + enddir = "-TLT" + end + for i=1,size do + local entry = list[i] + if entry.level >= level then + if not started then + entry.begindir = begindir + started = true + end + else + if started then + list[i-1].enddir = enddir + started = false + end + end + end + -- make sure to close the run at end of line + if started then + finaldir = enddir + end + end + if finaldir then + list[size].enddir = finaldir + end +end + +local function apply_to_list(list,size,head,pardir) + local index = 1 + local current = head + local done = false + while current do + if index > size then + report_directions("fatal error, size mismatch") + break + end + local id = current.id + local entry = list[index] + local begindir = entry.begindir + local enddir = entry.enddir + if id == glyph_code then + local mirror = entry.mirror + if mirror then + current.char = mirror + end + if trace_directions then + local original = entry.original + local direction = entry.direction + if mirror then + setcolor(current,"trace:dc") + elseif direction == "l" then + if original == direction then + setcolor(current,"trace:dr") + else + setcolor(current,"trace:dm") + end + elseif direction == "r" then + if original == direction then + setcolor(current,"trace:db") + else + setcolor(current,"trace:dg") + end + else + resetcolor(current) + end + end + elseif id == hlist_code or id == vlist_code then + -- current.list = process(current.list) -- not needed + current.dir = pardir -- is this really needed? + elseif id == glue_code then + if enddir and current.subtype == parfillskip_code then + -- insert the last enddir before \parfillskip glue + head = insert_node_before(head,current,new_textdir(enddir)) + enddir = false + done = true + end + elseif id == whatsit_code then + if begindir and current.subtype == localpar_code then + -- local_par should always be the 1st node + head, current = insert_node_after(head,current,new_textdir(begindir)) + begindir = nil + done = true + end + end + if begindir then + head = insert_node_before(head,current,new_textdir(begindir)) + done = true + end + local skip = entry.skip + if skip and skip > 0 then + for i=1,skip do + current = current.next + end + end + if enddir then + head, current = insert_node_after(head,current,new_textdir(enddir)) + done = true + end + if not entry.remove then + current = current.next + elseif remove_controls then + -- X9 + head, current = remove_node(head,current,true) + done = true + else + current = current.next + end + index = index + 1 + end + return head, done +end + +local function process(namespace,attribute,head) + local list, size = build_list(head) + local baselevel, pardir, dirfound = get_baselevel(head,list,size) -- we always have an inline dir node in context + if not dirfound and trace_details then + report_directions("no initial direction found, gambling") + end + if trace_details then + report_directions("before : %s",show_list(list,size,"original")) + end + resolve_explicit(list,size,baselevel) + resolve_levels(list,size,baselevel) + insert_dir_points(list,size) + if trace_details then + report_directions("after : %s",show_list(list,size,"direction")) + report_directions("result : %s",show_done(list,size)) + end + head, done = apply_to_list(list,size,head,pardir) + return head, done +end + +directions.installhandler(interfaces.variables.one,process) |