diff options
Diffstat (limited to 'context/data/textadept/context/lexers/scite-context-lexer-bidi.lua')
-rw-r--r-- | context/data/textadept/context/lexers/scite-context-lexer-bidi.lua | 598 |
1 files changed, 598 insertions, 0 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua new file mode 100644 index 000000000..ea9c56712 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua @@ -0,0 +1,598 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for plain text (with spell checking)", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp +local find, match = string.find, string.match + +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local bidilexer = lexer.new("bidi","scite-context-lexer-bidi") +local whitespace = bidilexer.whitespace + +local space = patterns.space +local any = patterns.any + +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- + +require("char-def") + +characters.directions = { } + +setmetatable(characters.directions,{ __index = function(t,k) + local d = data[k] + if d then + local v = d.direction + if v then + t[k] = v + return v + end + end + t[k] = false -- maybe 'l' + return false +end }) + +characters.mirrors = { } + +setmetatable(characters.mirrors,{ __index = function(t,k) + local d = data[k] + if d then + local v = d.mirror + if v then + t[k] = v + return v + end + end + t[k] = false + return false +end }) + +characters.textclasses = { } + +setmetatable(characters.textclasses,{ __index = function(t,k) + local d = data[k] + if d then + local v = d.textclass + if v then + t[k] = v + return v + end + end + t[k] = false + return false +end }) + +local directiondata = characters.directions +local mirrordata = characters.mirrors +local textclassdata = characters.textclasses + +local maximum_stack = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much +local analyze_fences = false + +local whitespace = { + lre = true, + rle = true, + lro = true, + rlo = true, + pdf = true, + bn = true, + ws = true, +} + +local b_s_ws_on = { + b = true, + s = true, + ws = true, + on = true +} + +local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0 } } +local mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } } +local mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } } +local mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } } +local mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0 } } + +local list = { } +local stack = { } + +setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end }) + +local function build_list(head) + -- P1 + local size = 0 + lpegmatch(pattern,head) + return list, size +end + +local function resolve_fences(list,size,start,limit) + -- N0: funny effects, not always better, so it's an option + local nofstack = 0 + for i=start,limit do + local entry = list[i] + if entry.direction == "on" then + local char = entry.char + local mirror = mirrordata[char] + if mirror then + local class = textclassdata[char] + entry.mirror = mirror + entry.class = class + if class == "open" then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = mirror + stacktop[2] = i + stacktop[3] = false -- not used + elseif nofstack == 0 then + -- skip + elseif class == "close" then + while nofstack > 0 do + local stacktop = stack[nofstack] + if stacktop[1] == char then + local open = stacktop[2] + local close = i + list[open ].paired = close + list[close].paired = open + break + else + -- do we mirror or not + end + nofstack = nofstack - 1 + end + end + end + end + end +end + +local function get_baselevel(list,size,direction) + if direction == "TRT" then + return 1, "TRT", true + elseif direction == "TLT" then + return 0, "TLT", true + end + -- P2, P3: + for i=1,size do + local entry = list[i] + local direction = entry.direction + if direction == "r" or direction == "al" then -- and an ? + return 1, "TRT", true + elseif direction == "l" then + return 0, "TLT", true + end + end + return 0, "TLT", false +end + +local function resolve_explicit(list,size,baselevel) +-- if list.rle or list.lre or list.rlo or list.lro then + -- X1 + local level = baselevel + local override = "on" + local nofstack = 0 + for i=1,size do + local entry = list[i] + local direction = entry.direction + -- X2 + if direction == "rle" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X3 + elseif direction == "lre" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X4 + elseif direction == "rlo" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "r" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X5 + elseif direction == "lro" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "l" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X7 + elseif direction == "pdf" then + if nofstack < maximum_stack then + local stacktop = stack[nofstack] + level = stacktop[1] + override = stacktop[2] + nofstack = nofstack - 1 + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X6 + else + entry.level = level + if override ~= "on" then + entry.direction = override + end + end + end +-- else +-- for i=1,size do +-- list[i].level = baselevel +-- end +-- end + -- X8 (reset states and overrides after paragraph) +end + +local function resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- W1: non spacing marks get the direction of the previous character +-- if list.nsm then + for i=start,limit do + local entry = list[i] + if entry.direction == "nsm" then + if i == start then + entry.direction = orderbefore + else + entry.direction = list[i-1].direction + end + end + end +-- end + -- W2: mess with numbers and arabic +-- if list.en then + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + for j=i-1,start,-1 do + local prev = list[j] + local direction = prev.direction + if direction == "al" then + entry.direction = "an" + break + elseif direction == "r" or direction == "l" then + break + end + end + end + end +-- end + -- W3 +-- if list.al then + for i=start,limit do + local entry = list[i] + if entry.direction == "al" then + entry.direction = "r" + end + end +-- end + -- W4: make separators number +-- if list.es or list.cs then + -- skip +-- if false then + if false then + for i=start+1,limit-1 do + local entry = list[i] + local direction = entry.direction + if direction == "es" then + if list[i-1].direction == "en" and list[i+1].direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = list[i-1].direction + if prevdirection == "en" then + if list[i+1].direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and list[i+1].direction == "an" then + entry.direction = "an" + end + end + end + else -- only more efficient when we have es/cs + local runner = start + 2 + local before = list[start] + local entry = list[start + 1] + local after = list[runner] + while after do + local direction = entry.direction + if direction == "es" then + if before.direction == "en" and after.direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = before.direction + if prevdirection == "en" then + if after.direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and after.direction == "an" then + entry.direction = "an" + end + end + before = current + current = after + after = list[runner] + runner = runner + 1 + end + end +-- end + -- W5 +-- if list.et then + local i = start + while i <= limit do + if list[i].direction == "et" then + local runstart = i + local runlimit = runstart + for i=runstart,limit do + if list[i].direction == "et" then + runlimit = i + else + break + end + end + local rundirection = runstart == start and sor or list[runstart-1].direction + if rundirection ~= "en" then + rundirection = runlimit == limit and orderafter or list[runlimit+1].direction + end + if rundirection == "en" then + for j=runstart,runlimit do + list[j].direction = "en" + end + end + i = runlimit + end + i = i + 1 + end +-- end + -- W6 +-- if list.es or list.cs or list.et then + for i=start,limit do + local entry = list[i] + local direction = entry.direction + if direction == "es" or direction == "et" or direction == "cs" then + entry.direction = "on" + end + end +-- end + -- W7 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + local prev_strong = orderbefore + for j=i-1,start,-1 do + local direction = list[j].direction + if direction == "l" or direction == "r" then + prev_strong = direction + break + end + end + if prev_strong == "l" then + entry.direction = "l" + end + end + end +end + +local function resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- N1, N2 + for i=start,limit do + local entry = list[i] + if b_s_ws_on[entry.direction] then + -- this needs checking + local leading_direction, trailing_direction, resolved_direction + local runstart = i + local runlimit = runstart + for j=runstart+1,limit do + if b_s_ws_on[list[j].direction] then + runlimit = j + else + break + end + end + if runstart == start then + leading_direction = orderbefore + else + leading_direction = list[runstart-1].direction + if leading_direction == "en" or leading_direction == "an" then + leading_direction = "r" + end + end + if runlimit == limit then + trailing_direction = orderafter + else + trailing_direction = list[runlimit+1].direction + if trailing_direction == "en" or trailing_direction == "an" then + trailing_direction = "r" + end + end + if leading_direction == trailing_direction then + -- N1 + resolved_direction = leading_direction + else + -- N2 / does the weird period + resolved_direction = entry.level % 2 == 1 and "r" or "l" + end + for j=runstart,runlimit do + list[j].direction = resolved_direction + end + i = runlimit + end + i = i + 1 + end +end + +local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) + for i=start,limit do + local entry = list[i] + local level = entry.level + local direction = entry.direction + if level % 2 ~= 1 then -- even + -- I1 + if direction == "r" then + entry.level = level + 1 + elseif direction == "an" or direction == "en" then + entry.level = level + 2 + end + else + -- I2 + if direction == "l" or direction == "en" or direction == "an" then + entry.level = level + 1 + end + end + end +end + +local function resolve_levels(list,size,baselevel,analyze_fences) + -- X10 + local start = 1 + while start < size do + local level = list[start].level + local limit = start + 1 + while limit < size and list[limit].level == level do + limit = limit + 1 + end + local prev_level = start == 1 and baselevel or list[start-1].level + local next_level = limit == size and baselevel or list[limit+1].level + local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" + local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" + -- W1 .. W7 + resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- N0 + if analyze_fences then + resolve_fences(list,size,start,limit) + end + -- N1 .. N2 + resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- I1 .. I2 + resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) + start = limit + end + -- L1 + for i=1,size do + local entry = list[i] + local direction = entry.original + -- (1) + if direction == "s" or direction == "b" then + entry.level = baselevel + -- (2) + for j=i-1,1,-1 do + local entry = list[j] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + end + end + -- (3) + for i=size,1,-1 do + local entry = list[i] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + -- L4 + if analyze_fences then + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + if entry.mirror and not entry.paired then + entry.mirror = false + end + -- okay + elseif entry.mirror then + entry.mirror = false + end + end + else + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + local mirror = mirrordata[entry.char] + if mirror then + entry.mirror = mirror + end + end + end + end +end + +local index = 1 + +local function process(head,direction) + local list, size = build_list(head) + local baselevel = get_baselevel(list,size,direction) -- we always have an inline dir node in context + resolve_explicit(list,size,baselevel) + resolve_levels(list,size,baselevel,analyze_fences) + index = 1 + return list, size +end + +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- + +local utf = lexer.helpers.utfbytepattern + +-- local t_start = token("default", utf, function(s,i) if i == 1 then index = 1 process(s) end end)) +-- local t_bidi = token("error", utf / function() index = index + 1 return list[index].direction == "r" end) +-- local t_rest = token("default", any) + +-- bidilexer._rules = { +-- { "start", t_start }, +-- { "bidi", t_bidi }, +-- { "rest", t_rest }, +-- } + +bidilexer._grammar = #utf * function(s,i) + process(s) + local t = { } + local n = 0 + for i=1,size do + n = n + 1 t[n] = i + n = n + 1 t[n] = "error" + end + return t +end + +bidilexer._tokenstyles = context.styleset + +return bidilexer |