diff options
author | Hans Hagen <pragma@wxs.nl> | 2021-10-08 20:46:55 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg@phi-gamma.net> | 2021-10-08 20:46:55 +0200 |
commit | 778f381ba6a448ab00d67994a412dd4226d43238 (patch) | |
tree | d9dade45016a572e6c22521bfb165f9829ac3192 /context/data/textadept/context/lexers/scite-context-lexer-bidi.lua | |
parent | 2073fe5d88215dddd9a9e6421afaea7ab7db955a (diff) | |
download | context-778f381ba6a448ab00d67994a412dd4226d43238.tar.gz |
2021-10-08 20:07:00
Diffstat (limited to 'context/data/textadept/context/lexers/scite-context-lexer-bidi.lua')
-rw-r--r-- | context/data/textadept/context/lexers/scite-context-lexer-bidi.lua | 598 |
1 files changed, 0 insertions, 598 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua deleted file mode 100644 index ea9c56712..000000000 --- a/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua +++ /dev/null @@ -1,598 +0,0 @@ -local info = { - version = 1.002, - comment = "scintilla lpeg lexer for plain text (with spell checking)", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files", -} - -local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp -local find, match = string.find, string.match - -local lexer = require("scite-context-lexer") -local context = lexer.context -local patterns = context.patterns - -local token = lexer.token - -local bidilexer = lexer.new("bidi","scite-context-lexer-bidi") -local whitespace = bidilexer.whitespace - -local space = patterns.space -local any = patterns.any - --- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - -require("char-def") - -characters.directions = { } - -setmetatable(characters.directions,{ __index = function(t,k) - local d = data[k] - if d then - local v = d.direction - if v then - t[k] = v - return v - end - end - t[k] = false -- maybe 'l' - return false -end }) - -characters.mirrors = { } - -setmetatable(characters.mirrors,{ __index = function(t,k) - local d = data[k] - if d then - local v = d.mirror - if v then - t[k] = v - return v - end - end - t[k] = false - return false -end }) - -characters.textclasses = { } - -setmetatable(characters.textclasses,{ __index = function(t,k) - local d = data[k] - if d then - local v = d.textclass - if v then - t[k] = v - return v - end - end - t[k] = false - return false -end }) - -local directiondata = characters.directions -local mirrordata = characters.mirrors -local textclassdata = characters.textclasses - -local maximum_stack = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much -local analyze_fences = false - -local whitespace = { - lre = true, - rle = true, - lro = true, - rlo = true, - pdf = true, - bn = true, - ws = true, -} - -local b_s_ws_on = { - b = true, - s = true, - ws = true, - on = true -} - -local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0 } } -local mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } } -local mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } } -local mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } } -local mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0 } } - -local list = { } -local stack = { } - -setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end }) - -local function build_list(head) - -- P1 - local size = 0 - lpegmatch(pattern,head) - return list, size -end - -local function resolve_fences(list,size,start,limit) - -- N0: funny effects, not always better, so it's an option - local nofstack = 0 - for i=start,limit do - local entry = list[i] - if entry.direction == "on" then - local char = entry.char - local mirror = mirrordata[char] - if mirror then - local class = textclassdata[char] - entry.mirror = mirror - entry.class = class - if class == "open" then - nofstack = nofstack + 1 - local stacktop = stack[nofstack] - stacktop[1] = mirror - stacktop[2] = i - stacktop[3] = false -- not used - elseif nofstack == 0 then - -- skip - elseif class == "close" then - while nofstack > 0 do - local stacktop = stack[nofstack] - if stacktop[1] == char then - local open = stacktop[2] - local close = i - list[open ].paired = close - list[close].paired = open - break - else - -- do we mirror or not - end - nofstack = nofstack - 1 - end - end - end - end - end -end - -local function get_baselevel(list,size,direction) - if direction == "TRT" then - return 1, "TRT", true - elseif direction == "TLT" then - return 0, "TLT", true - end - -- P2, P3: - for i=1,size do - local entry = list[i] - local direction = entry.direction - if direction == "r" or direction == "al" then -- and an ? - return 1, "TRT", true - elseif direction == "l" then - return 0, "TLT", true - end - end - return 0, "TLT", false -end - -local function resolve_explicit(list,size,baselevel) --- if list.rle or list.lre or list.rlo or list.lro then - -- X1 - local level = baselevel - local override = "on" - local nofstack = 0 - for i=1,size do - local entry = list[i] - local direction = entry.direction - -- X2 - if direction == "rle" then - if nofstack < maximum_stack then - nofstack = nofstack + 1 - local stacktop = stack[nofstack] - stacktop[1] = level - stacktop[2] = override - level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) - override = "on" - entry.level = level - entry.direction = "bn" - entry.remove = true - end - -- X3 - elseif direction == "lre" then - if nofstack < maximum_stack then - nofstack = nofstack + 1 - local stacktop = stack[nofstack] - stacktop[1] = level - stacktop[2] = override - level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) - override = "on" - entry.level = level - entry.direction = "bn" - entry.remove = true - end - -- X4 - elseif direction == "rlo" then - if nofstack < maximum_stack then - nofstack = nofstack + 1 - local stacktop = stack[nofstack] - stacktop[1] = level - stacktop[2] = override - level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) - override = "r" - entry.level = level - entry.direction = "bn" - entry.remove = true - end - -- X5 - elseif direction == "lro" then - if nofstack < maximum_stack then - nofstack = nofstack + 1 - local stacktop = stack[nofstack] - stacktop[1] = level - stacktop[2] = override - level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) - override = "l" - entry.level = level - entry.direction = "bn" - entry.remove = true - end - -- X7 - elseif direction == "pdf" then - if nofstack < maximum_stack then - local stacktop = stack[nofstack] - level = stacktop[1] - override = stacktop[2] - nofstack = nofstack - 1 - entry.level = level - entry.direction = "bn" - entry.remove = true - end - -- X6 - else - entry.level = level - if override ~= "on" then - entry.direction = override - end - end - end --- else --- for i=1,size do --- list[i].level = baselevel --- end --- end - -- X8 (reset states and overrides after paragraph) -end - -local function resolve_weak(list,size,start,limit,orderbefore,orderafter) - -- W1: non spacing marks get the direction of the previous character --- if list.nsm then - for i=start,limit do - local entry = list[i] - if entry.direction == "nsm" then - if i == start then - entry.direction = orderbefore - else - entry.direction = list[i-1].direction - end - end - end --- end - -- W2: mess with numbers and arabic --- if list.en then - for i=start,limit do - local entry = list[i] - if entry.direction == "en" then - for j=i-1,start,-1 do - local prev = list[j] - local direction = prev.direction - if direction == "al" then - entry.direction = "an" - break - elseif direction == "r" or direction == "l" then - break - end - end - end - end --- end - -- W3 --- if list.al then - for i=start,limit do - local entry = list[i] - if entry.direction == "al" then - entry.direction = "r" - end - end --- end - -- W4: make separators number --- if list.es or list.cs then - -- skip --- if false then - if false then - for i=start+1,limit-1 do - local entry = list[i] - local direction = entry.direction - if direction == "es" then - if list[i-1].direction == "en" and list[i+1].direction == "en" then - entry.direction = "en" - end - elseif direction == "cs" then - local prevdirection = list[i-1].direction - if prevdirection == "en" then - if list[i+1].direction == "en" then - entry.direction = "en" - end - elseif prevdirection == "an" and list[i+1].direction == "an" then - entry.direction = "an" - end - end - end - else -- only more efficient when we have es/cs - local runner = start + 2 - local before = list[start] - local entry = list[start + 1] - local after = list[runner] - while after do - local direction = entry.direction - if direction == "es" then - if before.direction == "en" and after.direction == "en" then - entry.direction = "en" - end - elseif direction == "cs" then - local prevdirection = before.direction - if prevdirection == "en" then - if after.direction == "en" then - entry.direction = "en" - end - elseif prevdirection == "an" and after.direction == "an" then - entry.direction = "an" - end - end - before = current - current = after - after = list[runner] - runner = runner + 1 - end - end --- end - -- W5 --- if list.et then - local i = start - while i <= limit do - if list[i].direction == "et" then - local runstart = i - local runlimit = runstart - for i=runstart,limit do - if list[i].direction == "et" then - runlimit = i - else - break - end - end - local rundirection = runstart == start and sor or list[runstart-1].direction - if rundirection ~= "en" then - rundirection = runlimit == limit and orderafter or list[runlimit+1].direction - end - if rundirection == "en" then - for j=runstart,runlimit do - list[j].direction = "en" - end - end - i = runlimit - end - i = i + 1 - end --- end - -- W6 --- if list.es or list.cs or list.et then - for i=start,limit do - local entry = list[i] - local direction = entry.direction - if direction == "es" or direction == "et" or direction == "cs" then - entry.direction = "on" - end - end --- end - -- W7 - for i=start,limit do - local entry = list[i] - if entry.direction == "en" then - local prev_strong = orderbefore - for j=i-1,start,-1 do - local direction = list[j].direction - if direction == "l" or direction == "r" then - prev_strong = direction - break - end - end - if prev_strong == "l" then - entry.direction = "l" - end - end - end -end - -local function resolve_neutral(list,size,start,limit,orderbefore,orderafter) - -- N1, N2 - for i=start,limit do - local entry = list[i] - if b_s_ws_on[entry.direction] then - -- this needs checking - local leading_direction, trailing_direction, resolved_direction - local runstart = i - local runlimit = runstart - for j=runstart+1,limit do - if b_s_ws_on[list[j].direction] then - runlimit = j - else - break - end - end - if runstart == start then - leading_direction = orderbefore - else - leading_direction = list[runstart-1].direction - if leading_direction == "en" or leading_direction == "an" then - leading_direction = "r" - end - end - if runlimit == limit then - trailing_direction = orderafter - else - trailing_direction = list[runlimit+1].direction - if trailing_direction == "en" or trailing_direction == "an" then - trailing_direction = "r" - end - end - if leading_direction == trailing_direction then - -- N1 - resolved_direction = leading_direction - else - -- N2 / does the weird period - resolved_direction = entry.level % 2 == 1 and "r" or "l" - end - for j=runstart,runlimit do - list[j].direction = resolved_direction - end - i = runlimit - end - i = i + 1 - end -end - -local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) - for i=start,limit do - local entry = list[i] - local level = entry.level - local direction = entry.direction - if level % 2 ~= 1 then -- even - -- I1 - if direction == "r" then - entry.level = level + 1 - elseif direction == "an" or direction == "en" then - entry.level = level + 2 - end - else - -- I2 - if direction == "l" or direction == "en" or direction == "an" then - entry.level = level + 1 - end - end - end -end - -local function resolve_levels(list,size,baselevel,analyze_fences) - -- X10 - local start = 1 - while start < size do - local level = list[start].level - local limit = start + 1 - while limit < size and list[limit].level == level do - limit = limit + 1 - end - local prev_level = start == 1 and baselevel or list[start-1].level - local next_level = limit == size and baselevel or list[limit+1].level - local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" - local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" - -- W1 .. W7 - resolve_weak(list,size,start,limit,orderbefore,orderafter) - -- N0 - if analyze_fences then - resolve_fences(list,size,start,limit) - end - -- N1 .. N2 - resolve_neutral(list,size,start,limit,orderbefore,orderafter) - -- I1 .. I2 - resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) - start = limit - end - -- L1 - for i=1,size do - local entry = list[i] - local direction = entry.original - -- (1) - if direction == "s" or direction == "b" then - entry.level = baselevel - -- (2) - for j=i-1,1,-1 do - local entry = list[j] - if whitespace[entry.original] then - entry.level = baselevel - else - break - end - end - end - end - -- (3) - for i=size,1,-1 do - local entry = list[i] - if whitespace[entry.original] then - entry.level = baselevel - else - break - end - end - -- L4 - if analyze_fences then - for i=1,size do - local entry = list[i] - if entry.level % 2 == 1 then -- odd(entry.level) - if entry.mirror and not entry.paired then - entry.mirror = false - end - -- okay - elseif entry.mirror then - entry.mirror = false - end - end - else - for i=1,size do - local entry = list[i] - if entry.level % 2 == 1 then -- odd(entry.level) - local mirror = mirrordata[entry.char] - if mirror then - entry.mirror = mirror - end - end - end - end -end - -local index = 1 - -local function process(head,direction) - local list, size = build_list(head) - local baselevel = get_baselevel(list,size,direction) -- we always have an inline dir node in context - resolve_explicit(list,size,baselevel) - resolve_levels(list,size,baselevel,analyze_fences) - index = 1 - return list, size -end - --- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - -local utf = lexer.helpers.utfbytepattern - --- local t_start = token("default", utf, function(s,i) if i == 1 then index = 1 process(s) end end)) --- local t_bidi = token("error", utf / function() index = index + 1 return list[index].direction == "r" end) --- local t_rest = token("default", any) - --- bidilexer._rules = { --- { "start", t_start }, --- { "bidi", t_bidi }, --- { "rest", t_rest }, --- } - -bidilexer._grammar = #utf * function(s,i) - process(s) - local t = { } - local n = 0 - for i=1,size do - n = n + 1 t[n] = i - n = n + 1 t[n] = "error" - end - return t -end - -bidilexer._tokenstyles = context.styleset - -return bidilexer |