summaryrefslogtreecommitdiff
path: root/context/data/scite/context/scite-ctx-bidi.lua
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/scite/context/scite-ctx-bidi.lua')
-rw-r--r--context/data/scite/context/scite-ctx-bidi.lua536
1 files changed, 536 insertions, 0 deletions
diff --git a/context/data/scite/context/scite-ctx-bidi.lua b/context/data/scite/context/scite-ctx-bidi.lua
new file mode 100644
index 000000000..ab64da70d
--- /dev/null
+++ b/context/data/scite/context/scite-ctx-bidi.lua
@@ -0,0 +1,536 @@
+if not modules then modules = { } end modules ['scite-ctx-bidi'] = {
+ version = 1.001,
+ comment = "companion to scite-ctx.lua",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+ comment = "Unicode bidi (sort of) variant c",
+}
+
+-- Partial comment from typo-duc.lua:
+--
+-- This is a follow up on typo-dua which itself is a follow up on t-bidi by Khaled Hosny which
+-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as
+-- an update on some recent unicode bidi developments. There is (and will) also be more control
+-- added. As a consequence this module is somewhat slower than its precursor which itself is
+-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin
+-- support. However, in the meantime performance got a bit better and this third variant is again
+-- some 10% faster than the second variant.
+--
+-- ... some patches and updates applied
+-- ... some code can be removed
+-- ... has to be kept in sync with context
+-- ... mtxrun --script interface
+--
+-- ... this feature is more fun than useful
+-- ... this way we can use it to check what scite / uniscribe sees (as scintila is weak on
+-- bidi selection)
+
+local setmetatable = setmetatable
+
+local data = require("context.lexers.data.scite-context-data-bidi")
+
+local directiondata = data.directions
+local mirrordata = data.mirrors
+local textclassdata = data.textclasses
+
+-- setmetatable(directiondata,{ __index = function(t,k) local v = "l" t[k] = v return v end })
+
+local maximum_stack = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much
+local analyze_fences = false
+
+local whitespace = {
+ lre = true,
+ rle = true,
+ lro = true,
+ rlo = true,
+ pdf = true,
+ bn = true,
+ ws = true,
+}
+
+local b_s_ws_on = {
+ b = true,
+ s = true,
+ ws = true,
+ on = true
+}
+
+local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0 } }
+----- mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } }
+----- mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } }
+----- mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } }
+----- mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0 } }
+
+local stack = { } -- shared
+
+setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end })
+
+local function build_list(list)
+ -- P1
+ local size = #list
+ for i=1,size do
+ local chr = list[i]
+ if chr == " " then
+ list[i] = setmetatable({ },mt_space)
+ else
+ local dir = directiondata[chr] or "l"
+ list[i] = { char = chr, direction = dir, original = dir, level = 0 }
+ end
+ end
+ return list, size
+end
+
+local function resolve_fences(list,size,start,limit)
+ -- N0: funny effects, not always better, so it's an option
+ local nofstack = 0
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "on" then
+ local char = entry.char
+ local mirror = mirrordata[char]
+ if mirror then
+ local class = textclassdata[char]
+ entry.mirror = mirror
+ entry.class = class
+ if class == "open" then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = mirror
+ stacktop[2] = i
+ stacktop[3] = false -- not used
+ elseif nofstack == 0 then
+ -- skip
+ elseif class == "close" then
+ while nofstack > 0 do
+ local stacktop = stack[nofstack]
+ if stacktop[1] == char then
+ local open = stacktop[2]
+ local close = i
+ list[open ].paired = close
+ list[close].paired = open
+ break
+ else
+ -- do we mirror or not
+ end
+ nofstack = nofstack - 1
+ end
+ end
+ end
+ end
+ end
+end
+
+local function get_baselevel(list,size,direction)
+ if direction == "TRT" then
+ return 1, "TRT", true
+ elseif direction == "TLT" then
+ return 0, "TLT", true
+ end
+ -- P2, P3:
+ for i=1,size do
+ local entry = list[i]
+ local direction = entry.direction
+ if direction == "r" or direction == "al" then -- and an ?
+ return 1, "TRT", true
+ elseif direction == "l" then
+ return 0, "TLT", true
+ end
+ end
+ return 0, "TLT", false
+end
+
+local function resolve_explicit(list,size,baselevel)
+-- if list.rle or list.lre or list.rlo or list.lro then
+ -- X1
+ local level = baselevel
+ local override = "on"
+ local nofstack = 0
+ for i=1,size do
+ local entry = list[i]
+ local direction = entry.direction
+ -- X2
+ if direction == "rle" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
+ override = "on"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X3
+ elseif direction == "lre" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
+ override = "on"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X4
+ elseif direction == "rlo" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
+ override = "r"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X5
+ elseif direction == "lro" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
+ override = "l"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X7
+ elseif direction == "pdf" then
+ if nofstack < maximum_stack then
+ local stacktop = stack[nofstack]
+ level = stacktop[1]
+ override = stacktop[2]
+ nofstack = nofstack - 1
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X6
+ else
+ entry.level = level
+ if override ~= "on" then
+ entry.direction = override
+ end
+ end
+ end
+-- else
+-- for i=1,size do
+-- list[i].level = baselevel
+-- end
+-- end
+ -- X8 (reset states and overrides after paragraph)
+end
+
+local function resolve_weak(list,size,start,limit,orderbefore,orderafter)
+ -- W1: non spacing marks get the direction of the previous character
+-- if list.nsm then
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "nsm" then
+ if i == start then
+ entry.direction = orderbefore
+ else
+ entry.direction = list[i-1].direction
+ end
+ end
+ end
+-- end
+ -- W2: mess with numbers and arabic
+-- if list.en then
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "en" then
+ for j=i-1,start,-1 do
+ local prev = list[j]
+ local direction = prev.direction
+ if direction == "al" then
+ entry.direction = "an"
+ break
+ elseif direction == "r" or direction == "l" then
+ break
+ end
+ end
+ end
+ end
+-- end
+ -- W3
+-- if list.al then
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "al" then
+ entry.direction = "r"
+ end
+ end
+-- end
+ -- W4: make separators number
+-- if list.es or list.cs then
+ -- skip
+-- if false then
+ if false then
+ for i=start+1,limit-1 do
+ local entry = list[i]
+ local direction = entry.direction
+ if direction == "es" then
+ if list[i-1].direction == "en" and list[i+1].direction == "en" then
+ entry.direction = "en"
+ end
+ elseif direction == "cs" then
+ local prevdirection = list[i-1].direction
+ if prevdirection == "en" then
+ if list[i+1].direction == "en" then
+ entry.direction = "en"
+ end
+ elseif prevdirection == "an" and list[i+1].direction == "an" then
+ entry.direction = "an"
+ end
+ end
+ end
+ else -- only more efficient when we have es/cs
+ local runner = start + 2
+ local before = list[start]
+ local entry = list[start + 1]
+ local after = list[runner]
+ while after do
+ local direction = entry.direction
+ if direction == "es" then
+ if before and before.direction == "en" and after.direction == "en" then
+ entry.direction = "en"
+ end
+ elseif direction == "cs" then
+ local prevdirection = before and before.direction
+ if prevdirection == "en" then
+ if after.direction == "en" then
+ entry.direction = "en"
+ end
+ elseif prevdirection == "an" and after.direction == "an" then
+ entry.direction = "an"
+ end
+ end
+ before = current
+ current = after
+ after = list[runner]
+ runner = runner + 1
+ end
+ end
+-- end
+ -- W5
+-- if list.et then
+ local i = start
+ while i <= limit do
+ if list[i].direction == "et" then
+ local runstart = i
+ local runlimit = runstart
+ for i=runstart,limit do
+ if list[i].direction == "et" then
+ runlimit = i
+ else
+ break
+ end
+ end
+ local rundirection = runstart == start and sor or list[runstart-1].direction
+ if rundirection ~= "en" then
+ rundirection = runlimit == limit and orderafter or list[runlimit+1].direction
+ end
+ if rundirection == "en" then
+ for j=runstart,runlimit do
+ list[j].direction = "en"
+ end
+ end
+ i = runlimit
+ end
+ i = i + 1
+ end
+-- end
+ -- W6
+-- if list.es or list.cs or list.et then
+ for i=start,limit do
+ local entry = list[i]
+ local direction = entry.direction
+ if direction == "es" or direction == "et" or direction == "cs" then
+ entry.direction = "on"
+ end
+ end
+-- end
+ -- W7
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "en" then
+ local prev_strong = orderbefore
+ for j=i-1,start,-1 do
+ local direction = list[j].direction
+ if direction == "l" or direction == "r" then
+ prev_strong = direction
+ break
+ end
+ end
+ if prev_strong == "l" then
+ entry.direction = "l"
+ end
+ end
+ end
+end
+
+local function resolve_neutral(list,size,start,limit,orderbefore,orderafter)
+ -- N1, N2
+ for i=start,limit do
+ local entry = list[i]
+ if b_s_ws_on[entry.direction] then
+ -- this needs checking
+ local leading_direction, trailing_direction, resolved_direction
+ local runstart = i
+ local runlimit = runstart
+ for j=runstart+1,limit do
+ if b_s_ws_on[list[j].direction] then
+ runlimit = j
+ else
+ break
+ end
+ end
+ if runstart == start then
+ leading_direction = orderbefore
+ else
+ leading_direction = list[runstart-1].direction
+ if leading_direction == "en" or leading_direction == "an" then
+ leading_direction = "r"
+ end
+ end
+ if runlimit == limit then
+ trailing_direction = orderafter
+ else
+ trailing_direction = list[runlimit+1].direction
+ if trailing_direction == "en" or trailing_direction == "an" then
+ trailing_direction = "r"
+ end
+ end
+ if leading_direction == trailing_direction then
+ -- N1
+ resolved_direction = leading_direction
+ else
+ -- N2 / does the weird period
+ resolved_direction = entry.level % 2 == 1 and "r" or "l"
+ end
+ for j=runstart,runlimit do
+ list[j].direction = resolved_direction
+ end
+ i = runlimit
+ end
+ i = i + 1
+ end
+end
+
+local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
+ for i=start,limit do
+ local entry = list[i]
+ local level = entry.level
+ local direction = entry.direction
+ if level % 2 ~= 1 then -- even
+ -- I1
+ if direction == "r" then
+ entry.level = level + 1
+ elseif direction == "an" or direction == "en" then
+ entry.level = level + 2
+ end
+ else
+ -- I2
+ if direction == "l" or direction == "en" or direction == "an" then
+ entry.level = level + 1
+ end
+ end
+ end
+end
+
+local function resolve_levels(list,size,baselevel,analyze_fences)
+ -- X10
+ local start = 1
+ while start < size do
+ local level = list[start].level
+ local limit = start + 1
+ while limit < size and list[limit].level == level do
+ limit = limit + 1
+ end
+ local prev_level = start == 1 and baselevel or list[start-1].level
+ local next_level = limit == size and baselevel or list[limit+1].level
+ local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l"
+ local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l"
+ -- W1 .. W7
+ resolve_weak(list,size,start,limit,orderbefore,orderafter)
+ -- N0
+ if analyze_fences then
+ resolve_fences(list,size,start,limit)
+ end
+ -- N1 .. N2
+ resolve_neutral(list,size,start,limit,orderbefore,orderafter)
+ -- I1 .. I2
+ resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
+ start = limit
+ end
+ -- L1
+ for i=1,size do
+ local entry = list[i]
+ local direction = entry.original
+ -- (1)
+ if direction == "s" or direction == "b" then
+ entry.level = baselevel
+ -- (2)
+ for j=i-1,1,-1 do
+ local entry = list[j]
+ if whitespace[entry.original] then
+ entry.level = baselevel
+ else
+ break
+ end
+ end
+ end
+ end
+ -- (3)
+ for i=size,1,-1 do
+ local entry = list[i]
+ if whitespace[entry.original] then
+ entry.level = baselevel
+ else
+ break
+ end
+ end
+ -- L4
+ if analyze_fences then
+ for i=1,size do
+ local entry = list[i]
+ if entry.level % 2 == 1 then
+ if entry.mirror and not entry.paired then
+ entry.mirror = false
+ end
+ elseif entry.mirror then
+ entry.mirror = false
+ end
+ end
+ else
+ for i=1,size do
+ local entry = list[i]
+ if entry.level % 2 == 1 then
+ local mirror = mirrordata[entry.char]
+ if mirror then
+ entry.mirror = mirror
+ end
+ end
+ end
+ end
+end
+
+local function process(head,direction)
+ local list, size = build_list(head)
+ local baselevel = get_baselevel(list,size,direction)
+ resolve_explicit(list,size,baselevel)
+ resolve_levels(list,size,baselevel,analyze_fences)
+ return list, size
+end
+
+return {
+ process = process,
+}