From c6ebdfe2b6d339bb36c7a2559c98be3499c49ba9 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Sat, 7 Sep 2013 13:50:00 +0200 Subject: beta 2013.09.07 13:50 --- tex/context/base/buff-ini.lua | 13 +- tex/context/base/char-ini.lua | 30 + tex/context/base/cont-new.mkiv | 2 +- tex/context/base/context-version.pdf | Bin 4106 -> 4111 bytes tex/context/base/context.mkiv | 2 +- tex/context/base/font-ctx.lua | 8 +- tex/context/base/font-hsh.lua | 51 +- tex/context/base/math-ini.mkiv | 51 +- tex/context/base/mult-def.mkiv | 2 + tex/context/base/node-rul.lua | 2 +- tex/context/base/spac-chr.mkiv | 7 +- tex/context/base/status-files.pdf | Bin 24736 -> 24770 bytes tex/context/base/status-lua.log | 2 +- tex/context/base/task-ini.lua | 2 + tex/context/base/typo-dir.lua | 664 +++++++++------ tex/context/base/typo-dir.mkiv | 63 +- tex/context/base/typo-uba.lua | 782 ++++++++++++++++++ tex/context/base/typo-ubb.lua | 887 +++++++++++++++++++++ tex/context/base/util-prs.lua | 12 +- tex/generic/context/luatex/luatex-fonts-merged.lua | 2 +- 20 files changed, 2310 insertions(+), 272 deletions(-) create mode 100644 tex/context/base/typo-uba.lua create mode 100644 tex/context/base/typo-ubb.lua (limited to 'tex') diff --git a/tex/context/base/buff-ini.lua b/tex/context/base/buff-ini.lua index 45288d18b..896584046 100644 --- a/tex/context/base/buff-ini.lua +++ b/tex/context/base/buff-ini.lua @@ -52,10 +52,10 @@ local function assign(name,str,catcodes) } end -local function append(name,str) +local function combine(name,str,prepend) local buffer = cache[name] if buffer then - buffer.data = buffer.data .. str + buffer.data = prepend and (str .. buffer.data) or (buffer.data .. str) buffer.typeset = false else cache[name] = { @@ -65,6 +65,14 @@ local function append(name,str) end end +local function prepend(name,str) + combine(name,str,true) +end + +local function append(name,str) + combine(name,str) +end + local function exists(name) return cache[name] end @@ -169,6 +177,7 @@ end buffers.raw = getcontent buffers.erase = erase buffers.assign = assign +buffers.prepend = prepend buffers.append = append buffers.exists = exists buffers.getcontent = getcontent diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index dc6e067c0..bb6730f7b 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -559,6 +559,36 @@ setmetatableindex(characters.directions,function(t,k) return v end) +characters.mirrors = { } + +setmetatableindex(characters.mirrors,function(t,k) + local d = data[k] + if d then + local v = d.mirror + if v then + t[k] = v + return v + end + end + t[k] = false + return v +end) + +characters.textclasses = { } + +setmetatableindex(characters.textclasses,function(t,k) + local d = data[k] + if d then + local v = d.textclass + if v then + t[k] = v + return v + end + end + t[k] = false + return v +end) + --[[ldx--

Next comes a whole series of helper methods. These are (will be) part of the official .

diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 9481969f5..33aefc217 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2013.09.03 10:22} +\newcontextversion{2013.09.07 13:50} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf index d7a182570..07ae485d4 100644 Binary files a/tex/context/base/context-version.pdf and b/tex/context/base/context-version.pdf differ diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index a06f3dd51..af39cc2f0 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -25,7 +25,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2013.09.03 10:22} +\edef\contextversion{2013.09.07 13:50} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/font-ctx.lua b/tex/context/base/font-ctx.lua index 8b44d1c36..feefac338 100644 --- a/tex/context/base/font-ctx.lua +++ b/tex/context/base/font-ctx.lua @@ -73,6 +73,8 @@ local resources = hashes.resources local csnames = hashes.csnames local marks = hashes.markdata local lastmathids = hashes.lastmathids +local exheights = hashes.exheights +local emwidths = hashes.emwidths local designsizefilename = fontgoodies.designsizes.filename @@ -1542,11 +1544,11 @@ end local dimenfactors = number.dimenfactors -function helpers.dimenfactor(unit,tfmdata) -- could be a method of a font instance +function helpers.dimenfactor(unit,id) if unit == "ex" then - return (tfmdata and tfmdata.parameters.x_height) or 655360 + return id and exheights[id] or 282460 -- lm 10pt elseif unit == "em" then - return (tfmdata and tfmdata.parameters.em_width) or 655360 + return id and emwidths [id] or 655360 -- lm 10pt else local du = dimenfactors[unit] return du and 1/du or tonumber(unit) or 1 diff --git a/tex/context/base/font-hsh.lua b/tex/context/base/font-hsh.lua index f5c80d705..a18f78cab 100644 --- a/tex/context/base/font-hsh.lua +++ b/tex/context/base/font-hsh.lua @@ -6,6 +6,8 @@ if not modules then modules = { } end modules ['font-hsh'] = { license = "see context related readme files" } +local rawget = rawget + local setmetatableindex = table.setmetatableindex local currentfont = font.current local allocate = utilities.storage.allocate @@ -47,6 +49,9 @@ hashes.italics = italics hashes.lastmathids = lastmathids hashes.dynamics = dynamics +local nodepool = nodes.pool +local dummyglyph = nodepool.register(nodepool.glyph()) + local nulldata = allocate { name = "nullfont", characters = { }, @@ -133,17 +138,6 @@ setmetatableindex(resources, function(t,k) end end) -setmetatableindex(quads, function(t,k) - if k == true then - return quads[currentfont()] - else - local parameters = parameters[k] - local quad = parameters and parameters.quad or 0 - t[k] = quad - return quad - end -end) - local nospacing = { width = 0, stretch = 0, @@ -183,12 +177,43 @@ setmetatableindex(marks, function(t,k) end end) +setmetatableindex(quads, function(t,k) + if k == true then + return quads[currentfont()] + else + local parameters = rawget(parameters,k) + local quad + if parameters then + quad = parameters.quad + else + dummyglyph.font = k + dummyglyph.char = 0x2014 -- emdash + quad = dummyglyph.width -- dirty trick + end + if not quad or quad == 0 then + quad = 655360 -- lm 10pt + end + t[k] = quad + return quad + end +end) + setmetatableindex(xheights, function(t,k) if k == true then return xheights[currentfont()] else - local parameters = parameters[k] - local xheight = parameters and parameters.xheight or 0 + local parameters = rawget(parameters,k) + local xheight + if parameters then + xheight = parameters.xheight + else + dummyglyph.font = k + dummyglyph.char = 0x78 -- x + xheight = dummyglyph.height -- dirty trick + end + if not xheight or xheight == 0 then + xheight = 282460 -- lm 10pt + end t[k] = xheight return xheight end diff --git a/tex/context/base/math-ini.mkiv b/tex/context/base/math-ini.mkiv index 7b2766c45..e0afc3e93 100644 --- a/tex/context/base/math-ini.mkiv +++ b/tex/context/base/math-ini.mkiv @@ -61,6 +61,7 @@ \definesystemattribute[mathcategory] [public] \definesystemattribute[mathmode] [public] \definesystemattribute[mathitalics] [public] +\definesystemattribute[mathbidi] [public] \definesystemattribute[displaymath] [public] @@ -805,7 +806,34 @@ \setupmathematics [\c!compact=no] -%D Arabic: +% \enabletrackers[typesetters.directions.math] + +%D Right||to||left typesetting in math is supported by the \type {align} parameter +%D with as option the \type {bidi} parameter. Of course support for special symbols +%D like square roots depends on the font as well. We probably need to mirror a few +%D more characters. +%D +%D \startbuffer +%D \removeunwantedspaces +%D \m{ ( 1 = 1) }\quad +%D \m{ (123 = 123) }\quad +%D \m{ a ( 1 = 1) b }\quad +%D \m{ a (123 = 123) b }\quad +%D \m{ x = 123 y + (1 / \sqrt {x}) } +%D \stopbuffer +%D +%D \typebuffer +%D +%D \starttabulate[|T|T||] +%D \HL +%D \NC align \NC bidi \NC \NC \NR +%D \HL +%D \NC l2r \NC no \NC \setupmathematics[bidi=no] \getbuffer \NC \NR +%D \NC l2r \NC yes \NC \setupmathematics[bidi=yes] \getbuffer \NC \NR +%D \NC r2l \NC no \NC \setupmathematics[align=r2l,bidi=no] \getbuffer \NC \NR +%D \NC r2l \NC yes \NC \setupmathematics[align=r2l,bidi=yes] \getbuffer \NC \NR +%D \HL +%D \stoptabulate \newconditional\c_math_right_to_left @@ -818,9 +846,28 @@ \appendtoks \math_basics_synchronize_direction -%to \everymathematics % comes too late and I'm not in the mood for a mixed mode kludge now +%to \everymathematics % comes too late and I'm not in the mood for a mixed mode kludge now (should be a property of beginmath nodes and passed to callbacks) \to \everyswitchmathematics +% experimental (needed for an article) + +\installcorenamespace {mathbidi} + +\newcount\c_math_bidi + +\setvalue{\??mathbidi\v!no }{\ctxcommand{setmathdirection(0)}\c_math_bidi\attributeunsetvalue} +\setvalue{\??mathbidi\v!yes}{\ctxcommand{setmathdirection(1)}\c_math_bidi\plusone} + +\appendtoks + \edef\p_bidi{\mathematicsparameter\c!bidi}% + \csname\??mathbidi\ifcsname\??mathbidi\p_bidi\endcsname\p_bidi\else\v!no\fi\endcsname +\to \everysetupmathematics + +\appendtoks + \attribute\mathbidiattribute\ifconditional\c_math_right_to_left\c_math_bidi\else\attributeunsetvalue\fi +\to \everyswitchmathematics + + %D Delayed: greek. %D %D \starttyping diff --git a/tex/context/base/mult-def.mkiv b/tex/context/base/mult-def.mkiv index 6fff33f6a..21ed8cb87 100644 --- a/tex/context/base/mult-def.mkiv +++ b/tex/context/base/mult-def.mkiv @@ -34,6 +34,8 @@ % start todo: +\def\c!fences {fences} + \def\c!language {language} \def\c!compressseparator{compressseparator} \def\c!renderingsetup {renderingsetup} diff --git a/tex/context/base/node-rul.lua b/tex/context/base/node-rul.lua index 5c64c0113..6fd0ad068 100644 --- a/tex/context/base/node-rul.lua +++ b/tex/context/base/node-rul.lua @@ -273,7 +273,7 @@ local function flush_ruled(head,f,l,d,level,parent,strip) -- not that fast but a local transparency = ta > 0 and ta or f[a_transparency] local foreground = order == v_foreground - local e = dimenfactor(unit,fontdata[f.font]) -- what if no glyph node + local e = dimenfactor(unit,f.font) -- what if no glyph node local rt = tonumber(rulethickness) if rt then diff --git a/tex/context/base/spac-chr.mkiv b/tex/context/base/spac-chr.mkiv index 0b6ebe0a9..54a25be34 100644 --- a/tex/context/base/spac-chr.mkiv +++ b/tex/context/base/spac-chr.mkiv @@ -68,13 +68,16 @@ \edef\breakablethinspace {\normalUchar"2009} % quad/8 \edef\hairspace {\normalUchar"200A} % quad/8 \edef\zerowidthspace {\normalUchar"200B} % 0 -\edef\zwnj {\normalUchar"200C} % 0 -\edef\zwj {\normalUchar"200D} % 0 +\edef\zerowidthnonjoiner {\normalUchar"200C} % 0 +\edef\zerowidthjoiner {\normalUchar"200D} % 0 \edef\narrownobreakspace {\normalUchar"202F} % quad/8 % % "205F % space/8 (math) % \zerowidthnobreakspace {\normalUchar"FEFF} \udef\zerowidthnobreakspace {\penalty\plustenthousand\kern\zeropoint} +\let\zwnj\zerowidthnonjoiner +\let\zwj \zerowidthjoiner + % Shortcuts: % unexpanded as otherwise we need to intercept / cleanup a lot diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 966752d3f..2933fd5b3 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.log b/tex/context/base/status-lua.log index afdc2548f..9279aef57 100644 --- a/tex/context/base/status-lua.log +++ b/tex/context/base/status-lua.log @@ -1,6 +1,6 @@ (cont-yes.mkiv -ConTeXt ver: 2013.09.03 10:22 MKIV beta fmt: 2013.9.3 int: english/english +ConTeXt ver: 2013.09.07 13:50 MKIV beta fmt: 2013.9.7 int: english/english system > 'cont-new.mkiv' loaded (cont-new.mkiv) diff --git a/tex/context/base/task-ini.lua b/tex/context/base/task-ini.lua index 5ee7e8994..4390a4521 100644 --- a/tex/context/base/task-ini.lua +++ b/tex/context/base/task-ini.lua @@ -92,6 +92,7 @@ appendaction("math", "normalizers", "noads.handlers.classes", nil, "noh appendaction("math", "builders", "builders.kernel.mlist_to_hlist") -- always on ------------("math", "builders", "noads.handlers.italics", nil, "nohead") -- disabled +appendaction("math", "builders", "typesetters.directions.processmath") -- disabled (has to happen pretty late) -- quite experimental (nodes.handlers.graphicvadjust might go away) @@ -167,6 +168,7 @@ disableaction("math", "noads.handlers.showtree") disableaction("math", "noads.handlers.tags") disableaction("math", "noads.handlers.italics") disableaction("math", "noads.handlers.classes") +disableaction("math", "typesetters.directions.processmath") disableaction("mvlbuilders", "typesetters.checkers.handler") disableaction("vboxbuilders","typesetters.checkers.handler") diff --git a/tex/context/base/typo-dir.lua b/tex/context/base/typo-dir.lua index 16de85dd1..939850e81 100644 --- a/tex/context/base/typo-dir.lua +++ b/tex/context/base/typo-dir.lua @@ -6,28 +6,86 @@ if not modules then modules = { } end modules ['typo-dir'] = { license = "see context related readme files" } --- todo: also use end_of_math here? --- todo: use lpeg instead of match +-- When we started with this, there were some issues in luatex so we needed to take care of +-- intereferences. Some has been improved but we stil might end up with each node having a +-- dir property. Now, the biggest problem is that there is an official bidi algorithm but +-- some searching on the web shows that there are many confusing aspects and therefore +-- proposals circulate about (sometimes imcompatible ?) improvements. In the end it all boils +-- down to the lack of willingness to tag an input source. Of course tagging of each number +-- and fenced strip is somewhat over the top, but now it has to be captured in logic. Texies +-- normally have no problem with tagging but we need to handle any input. So, what we have +-- done here (over the years) is starting from what we expect to see happen, especially with +-- respect to punctation, numbers and fences. Eventually alternative algorithms will be provides +-- so that users can choose (the reason why suggestion sfor improvements circulate on the web +-- is that it is non trivial to predict the expected behaviour so one hopes that the ditor +-- and the rest of the machinery match somehow. Anyway, the fun of tex is that it has no hard +-- coded behavior. And ... we also want to have more debugging and extras and ... so we want +-- a flexible approach. In the end we will have: +-- +-- = full tagging (mechanism turned off) +-- = half tagging (the current implementation) +-- = unicode version x interpretation (several depending on the evolution) + +-- Some analysis by Idris: +-- +-- 1. Assuming the reading- vs word-order distinction (bidi-char types) is governing; +-- 2. Assuming that 'ARAB' represents an actual arabic string in raw input order, not word-order; +-- 3. Assuming that 'BARA' represent the correct RL word order; +-- +-- Then we have, with input: LATIN ARAB +-- +-- \textdir TLT LATIN ARAB => LATIN BARA +-- \textdir TRT LATIN ARAB => LATIN BARA +-- \textdir TRT LRO LATIN ARAB => LATIN ARAB +-- \textdir TLT LRO LATIN ARAB => LATIN ARAB +-- \textdir TLT RLO LATIN ARAB => NITAL ARAB +-- \textdir TRT RLO LATIN ARAB => NITAL ARAB + +-- elseif d == "es" then -- European Number Separator +-- elseif d == "et" then -- European Number Terminator +-- elseif d == "cs" then -- Common Number Separator +-- elseif d == "nsm" then -- Non-Spacing Mark +-- elseif d == "bn" then -- Boundary Neutral +-- elseif d == "b" then -- Paragraph Separator +-- elseif d == "s" then -- Segment Separator +-- elseif d == "ws" then -- Whitespace +-- elseif d == "on" then -- Other Neutrals + +-- todo : delayed inserts here +-- todo : get rid of local functions here +-- beware: math adds whatsits afterwards so that will mess things up +-- todo : use new dir functions +-- todo : make faster +-- todo : also use end_of_math here? +-- todo : use lpeg instead of match +-- todo : move dir info into nodes +-- todo : swappable tables and floats i.e. start-end overloads (probably loop in builders) +-- todo : check if we still have crashes in luatex when non-matched (used to be the case) +-- todo : look into the (new) unicode logic (non intuitive stuff) local next, type = next, type local format, insert, sub, find, match = string.format, table.insert, string.sub, string.find, string.match local utfchar = utf.char local formatters = string.formatters --- vertical space handler - local nodes, node = nodes, node -local trace_directions = false trackers.register("typesetters.directions", function(v) trace_directions = v end) +local trace_textdirections = false trackers.register("typesetters.directions.text", function(v) trace_textdirections = v end) +local trace_mathdirections = false trackers.register("typesetters.directions.math", function(v) trace_mathdirections = v end) +local trace_directions = false trackers.register("typesetters.directions", function(v) trace_textdirections = v trace_mathdirections = v end) + +local report_textdirections = logs.reporter("typesetting","text directions") +local report_mathdirections = logs.reporter("typesetting","math directions") -local report_directions = logs.reporter("typesetting","directions") local traverse_id = node.traverse_id local insert_node_before = node.insert_before local insert_node_after = node.insert_after local remove_node = nodes.remove +local end_of_math = nodes.end_of_math local texsetattribute = tex.setattribute +local texsetcount = tex.setcount local unsetvalue = attributes.unsetvalue local nodecodes = nodes.nodecodes @@ -42,6 +100,8 @@ local math_code = nodecodes.math local penalty_code = nodecodes.penalty local kern_code = nodecodes.kern local glue_code = nodecodes.glue +local hlist_code = nodecodes.hlist +local vlist_code = nodecodes.vlist local localpar_code = whatcodes.localpar local dir_code = whatcodes.dir @@ -50,191 +110,215 @@ local nodepool = nodes.pool local new_textdir = nodepool.textdir -local beginmath_code = mathcodes.beginmath -local endmath_code = mathcodes.endmath - local fonthashes = fonts.hashes local fontdata = fonthashes.identifiers local fontchar = fonthashes.characters -local chardata = characters.data -local chardirs = characters.directions -- maybe make a special mirror table - ---~ Analysis by Idris: ---~ ---~ 1. Assuming the reading- vs word-order distinction (bidi-char types) is governing; ---~ 2. Assuming that 'ARAB' represents an actual arabic string in raw input order, not word-order; ---~ 3. Assuming that 'BARA' represent the correct RL word order; ---~ ---~ Then we have, with input: LATIN ARAB ---~ ---~ \textdir TLT LATIN ARAB => LATIN BARA ---~ \textdir TRT LATIN ARAB => LATIN BARA ---~ \textdir TRT LRO LATIN ARAB => LATIN ARAB ---~ \textdir TLT LRO LATIN ARAB => LATIN ARAB ---~ \textdir TLT RLO LATIN ARAB => NITAL ARAB ---~ \textdir TRT RLO LATIN ARAB => NITAL ARAB - --- elseif d == "es" then -- European Number Separator --- elseif d == "et" then -- European Number Terminator --- elseif d == "cs" then -- Common Number Separator --- elseif d == "nsm" then -- Non-Spacing Mark --- elseif d == "bn" then -- Boundary Neutral --- elseif d == "b" then -- Paragraph Separator --- elseif d == "s" then -- Segment Separator --- elseif d == "ws" then -- Whitespace --- elseif d == "on" then -- Other Neutrals - -typesetters.directions = typesetters.directions or { } -local directions = typesetters.directions - -local a_state = attributes.private('state') -local a_directions = attributes.private('directions') - -local skipmath = true -local strip = false - --- todo: delayed inserts here --- todo: get rid of local functions here - --- beware, math adds whatsits afterwards so that will mess things up - -local finish, autodir, embedded, override, done = nil, 0, 0, 0, false -local list, glyphs = nil, false -local finished, finidir, finipos = nil, nil, 1 -local head, current, inserted = nil, nil, nil - -local function finish_auto_before() - local fdir = "-" .. finish - head, inserted = insert_node_before(head,current,new_textdir(fdir)) - finished, finidir, autodir = inserted, finish, 0 - if trace_directions then - insert(list,#list,formatters["auto %a inserted before, autodir %a, embedded %a"](fdir,autodir,embedded)) - finipos = #list - 1 - end - finish, done = nil, true -end +local chardirections = characters.directions +local charmirrors = characters.mirrors +local charclasses = characters.textclasses + +local directions = typesetters.directions or { } +typesetters.directions = directions + +local a_state = attributes.private('state') +local a_directions = attributes.private('directions') +local a_mathbidi = attributes.private('mathbidi') + +local strip = false + +local s_isol = fonts.analyzers.states.isol -local function finish_auto_after() - local fdir = "-" .. finish - head, current = insert_node_after(head,current,new_textdir(fdir)) - finished, finidir, autodir = current, finish, 0 - if trace_directions then - list[#list+1] = formatters["auto %a inserted after, autodir %a, embedded %a"](fdir,autodir,embedded) - finipos = #list +local variables = interfaces.variables +local v_global = variables["global"] +local v_local = variables["local"] +local v_on = variables.on +local v_yes = variables.yes + +local m_enabled = 2^6 -- 64 +local m_global = 2^7 +local m_fences = 2^8 + +local handlers = { } +local methods = { } +local lastmethod = 0 + +local function installhandler(name,handler) + local method = methods[name] + if not method then + lastmethod = lastmethod + 1 + method = lastmethod + methods[name] = method end - finish, done = nil, true + handlers[method] = handler + return method end -local function force_auto_left_before(d) - if finish then - finish_auto_before() - end - if embedded >= 0 then - finish, autodir, done = "TLT", 1, true +directions.handlers = handlers +directions.installhandler = installhandler + +local function tomode(specification) + local scope = specification.scope + local mode + if scope == v_global or scope == v_on then + mode = m_enabled + m_global + elseif scope == v_local then + mode = m_enabled else - finish, autodir, done = "TRT", -1, true + return 0 end - if finidir == finish then - head = remove_node(head,finished,true) - if trace_directions then - list[finipos] = list[finipos] .. ", deleted afterwards" - insert(list,#list,formatters["start text dir %a, auto left before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) - end + local method = methods[specification.method] + if method then + mode = mode + method else - head, inserted = insert_node_before(head,current,new_textdir("+"..finish)) - if trace_directions then - insert(list,#list,formatters["start text dir %a, auto left before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) - end + return 0 + end + if specification.fences == v_yes then + mode = mode + m_fences end + return mode end -local function force_auto_right_before(d) - if finish then - finish_auto_before() +local function getglobal(a) + return a and a > 0 and hasbit(a,m_global) +end + +local function getfences(a) + return a and a > 0 and hasbit(a,m_fences) +end + +local function getmethod(a) + return a and a > 0 and a % m_enabled or 0 +end + +directions.tomode = tomode +directions.getscope = getscope +directions.getfences = getfences +directions.getmethod = getmethod +directions.installhandler = installhandler + +function commands.getbidimode(specification) + context(tomode(specification)) -- hash at tex end +end + +local function process_direct(namespace,attribute,start) + + local head = start + + local current, inserted = head, nil + local finish, autodir, embedded, override, done = nil, 0, 0, 0, false + local list, glyphs = trace_textdirections and { }, false + local finished, finidir, finipos = nil, nil, 1 + local stack, top, obsolete = { }, 0, { } + local lro, rlo, prevattr = false, false, 0 + + local function finish_auto_before() + local fdir = finish == "TRT" and "-TRT" or "-TLT" + head, inserted = insert_node_before(head,current,new_textdir(fdir)) + finished, finidir, autodir = inserted, finish, 0 + if trace_textdirections then + insert(list,#list,formatters["auto %a inserted before, autodir %a, embedded %a"](fdir,autodir,embedded)) + finipos = #list - 1 + end + finish, done = nil, true end - if embedded <= 0 then - finish, autodir, done = "TRT", -1, true - else - finish, autodir, done = "TLT", 1, true + + local function finish_auto_after() + local fdir = finish == "TRT" and "-TRT" or "-TLT" + head, current = insert_node_after(head,current,new_textdir(fdir)) + finished, finidir, autodir = current, finish, 0 + if trace_textdirections then + list[#list+1] = formatters["auto %a inserted after, autodir %a, embedded %a"](fdir,autodir,embedded) + finipos = #list + end + finish, done = nil, true end - if finidir == finish then - head = remove_node(head,finished,true) - if trace_directions then - list[finipos] = list[finipos] .. ", deleted afterwards" - insert(list,#list,formatters["start text dir %a, auto right before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) + + local function force_auto_left_before(d) + if finish then + finish_auto_before() end - else - head, inserted = insert_node_before(head,current,new_textdir("+"..finish)) - if trace_directions then - insert(list,#list,formatters["start text dir %a, auto right before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) + if embedded >= 0 then + finish, autodir = "TLT", 1 + else + finish, autodir = "TRT", -1 + end + done = true + if finidir == finish then + head = remove_node(head,finished,true) + if trace_textdirections then + list[finipos] = list[finipos] .. ", deleted afterwards" + insert(list,#list,formatters["start text dir %a, auto left before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) + end + else + head, inserted = insert_node_before(head,current,new_textdir("+"..finish)) + if trace_textdirections then + insert(list,#list,formatters["start text dir %a, auto left before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) + end end end -end -local function nextisright(current) - repeat - current = current.next - local id = current.id - if id == glyph_code then - local char = current.char - local d = chardirs[char] - return d == "r" or d == "al" or d == "an" and current --- elseif id == glue_code or id == kern_code or id == penalty_code then --- -- too complex + local function force_auto_right_before(d) + if finish then + finish_auto_before() + end + if embedded <= 0 then + finish, autodir, done = "TRT", -1 else - return + finish, autodir, done = "TLT", 1 end - until not current -end - -local function previsright(current) - repeat - current = current.prev - local id = current.id - if id == glyph_code then - local char = current.char - local d = chardirs[char] - return d == "r" or d == "al" or d == "an" --- elseif id == glue_code or id == kern_code or id == penalty_code then --- -- too complex + done = true + if finidir == finish then + head = remove_node(head,finished,true) + if trace_textdirections then + list[finipos] = list[finipos] .. ", deleted afterwards" + insert(list,#list,formatters["start text dir %a, auto right before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) + end else - return + head, inserted = insert_node_before(head,current,new_textdir("+"..finish)) + if trace_textdirections then + insert(list,#list,formatters["start text dir %a, auto right before, embedded %a, autodir %a, triggered by class %a"](finish,embedded,autodir,d)) + end end - until not current -end - --- todo: use new dir functions - --- todo: use end_of_math + end -local s_isol = fonts.analyzers.states.isol + local function nextisright(current) + -- repeat + current = current.next + local id = current.id + if id == glyph_code then + local char = current.char + local d = chardirections[char] + return d == "r" or d == "al" or d == "an" + -- elseif id == glue_code or id == kern_code or id == penalty_code then + -- -- too complex + -- else + -- return + end + -- until not current + end -function directions.process(namespace,attribute,start) -- todo: make faster - if not start.next then - return start, false + local function previsright(current) + -- repeat + current = current.prev + local id = current.id + if id == glyph_code then + local char = current.char + local d = chardirections[char] + return d == "r" or d == "al" or d == "an" + -- elseif id == glue_code or id == kern_code or id == penalty_code then + -- -- too complex + -- else + -- return + end + -- until not current end - head, current, inserted = start, start, nil - finish, autodir, embedded, override, done = nil, 0, 0, 0, false - list, glyphs = trace_directions and { }, false - finished, finidir, finipos = nil, nil, 1 - local stack, top, obsolete = { }, 0, { } - local lro, rlo, prevattr, inmath = false, false, 0, false + while current do local id = current.id --- list[#list+1] = formatters["state: node %a, finish %a, autodir %a, embedded %a"](nutstring(current),finish or "unset",autodir,embedded) - if skipmath and id == math_code then - local subtype = current.subtype - if subtype == beginmath_code then - inmath = true - elseif subtype == endmath_code then - inmath = false - else - -- todo - end - current = current.next - elseif inmath then - current = current.next + -- list[#list+1] = formatters["state: node %a, finish %a, autodir %a, embedded %a"](nutstring(current),finish or "unset",autodir,embedded) + if id == math_code then + current = end_of_math(current.next).next else local attr = current[attribute] if attr and attr > 0 then @@ -244,17 +328,17 @@ function directions.process(namespace,attribute,start) -- todo: make faster elseif attr ~= prevattr then -- no pop, grouped driven (2=normal,3=lro,4=rlo) if attr == 3 then - if trace_directions then + if trace_textdirections then list[#list+1] = formatters["override right -> left (lro), bidi %a"](attr) end lro, rlo = true, false elseif attr == 4 then - if trace_directions then + if trace_textdirections then list[#list+1] = formatters["override left -> right (rlo), bidi %a"](attr) end lro, rlo = false, true else - if trace_directions and + if trace_textdirections and current ~= head then list[#list+1] = formatters["override reset, bidi %a"](attr) end lro, rlo = false, false @@ -266,14 +350,14 @@ function directions.process(namespace,attribute,start) -- todo: make faster glyphs = true if attr and attr > 0 then local char = current.char - local d = chardirs[char] + local d = chardirections[char] if rlo or override > 0 then if d == "l" then - if trace_directions then + if trace_textdirections then list[#list+1] = formatters["char %C of class %a overridden to r, bidi %a)"](char,d,attr) end d = "r" - elseif trace_directions then + elseif trace_textdirections then if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal list[#list+1] = formatters["override char of class %a, bidi %a"](d,attr) else -- todo: rle lre @@ -283,18 +367,18 @@ function directions.process(namespace,attribute,start) -- todo: make faster elseif lro or override < 0 then if d == "r" or d == "al" then current[a_state] = s_isol -- maybe better have a special bidi attr value -> override (9) -> todo - if trace_directions then + if trace_textdirections then list[#list+1] = formatters["char %C of class %a overridden to l, bidi %a, state 'isol'"](char,d,attr) end d = "l" - elseif trace_directions then + elseif trace_textdirections then if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal list[#list+1] = formatters["override char of class %a, bidi %a"](d,attr) else -- todo: rle lre list[#list+1] = formatters["char %C of class %a, bidi %a"](char,d,attr) end end - elseif trace_directions then + elseif trace_textdirections then if d == "lro" or d == "rlo" or d == "pdf" then -- else side effects on terminal list[#list+1] = formatters["override char of class %a, bidi %a"](d,attr) else -- todo: rle lre @@ -302,11 +386,10 @@ function directions.process(namespace,attribute,start) -- todo: make faster end end if d == "on" then - local cdata = chardata[char] - local mirror = cdata.mirror -- maybe make a special mirror table + local mirror = charmirrors[char] if mirror and fontchar[current.font][mirror] then -- todo: set attribute - local class = cdata.textclass + local class = charclasses[char] if class == "open" then if nextisright(current) then if autodir >= 0 then @@ -333,7 +416,7 @@ function directions.process(namespace,attribute,start) -- todo: make faster else mirror = nil end - if trace_directions then + if trace_textdirections then if mirror then list[#list+1] = formatters["mirroring char %C of class %a to %C, autodir %a, bidi %a"](char,d,mirror,autodir,attr) else @@ -351,14 +434,14 @@ function directions.process(namespace,attribute,start) -- todo: make faster end elseif d == "an" then -- arabic number -- actually this is language dependent ... --- if autodir <= 0 then --- force_auto_left_before(d) --- end + -- if autodir <= 0 then + -- force_auto_left_before(d) + -- end if autodir >= 0 then force_auto_right_before(d) end elseif d == "lro" then -- Left-to-Right Override -> right becomes left - if trace_directions then + if trace_textdirections then list[#list+1] = "override right -> left" end top = top + 1 @@ -366,7 +449,7 @@ function directions.process(namespace,attribute,start) -- todo: make faster override = -1 obsolete[#obsolete+1] = current elseif d == "rlo" then -- Right-to-Left Override -> left becomes right - if trace_directions then + if trace_textdirections then list[#list+1] = "override left -> right" end top = top + 1 @@ -374,7 +457,7 @@ function directions.process(namespace,attribute,start) -- todo: make faster override = 1 obsolete[#obsolete+1] = current elseif d == "lre" then -- Left-to-Right Embedding -> TLT - if trace_directions then + if trace_textdirections then list[#list+1] = "embedding left -> right" end top = top + 1 @@ -382,7 +465,7 @@ function directions.process(namespace,attribute,start) -- todo: make faster embedded = 1 obsolete[#obsolete+1] = current elseif d == "rle" then -- Right-to-Left Embedding -> TRT - if trace_directions then + if trace_textdirections then list[#list+1] = "embedding right -> left" end top = top + 1 @@ -390,69 +473,62 @@ function directions.process(namespace,attribute,start) -- todo: make faster embedded = -1 -- was 1 obsolete[#obsolete+1] = current elseif d == "pdf" then -- Pop Directional Format - -- override = 0 + -- override = 0 if top > 0 then local s = stack[top] override, embedded = s[1], s[2] top = top - 1 - if trace_directions then + if trace_textdirections then list[#list+1] = formatters["state: override %a, embedded %a, autodir %a"](override,embedded,autodir) end else - if trace_directions then + if trace_textdirections then list[#list+1] = "pop error: too many pops" end end obsolete[#obsolete+1] = current end - elseif trace_directions then + elseif trace_textdirections then local char = current.char - local d = chardirs[char] + local d = chardirections[char] list[#list+1] = formatters["char %C of class %a, bidi %a"](char,d or "?") end elseif id == whatsit_code then + -- we have less directions now so we can do hard checks for strings instead of splitting into pieces if finish then finish_auto_before() end local subtype = current.subtype if subtype == localpar_code then --- if false then - local dir = current.dir - local d = sub(dir,2,2) - if d == 'R' then -- find(dir,".R.") / dir == "TRT" - autodir = -1 - else - autodir = 1 - end - -- embedded = autodir - if trace_directions then - list[#list+1] = formatters["pardir %a"](dir) - end --- end - elseif subtype == dir_code then - local dir = current.dir - -- local sign = sub(dir,1,1) - -- local dire = sub(dir,3,3) - local sign, dire = match(dir,"^(.).(.)") -- splitter - if dire == "R" then - if sign == "+" then - finish, autodir = "TRT", -1 - else - finish, autodir = nil, 0 + -- if false then + local dir = current.dir + if dir == 'TRT' then + autodir = -1 + elseif dir == 'TLT' then + autodir = 1 end - else - if sign == "+" then - finish, autodir = "TLT", 1 - else - finish, autodir = nil, 0 + -- embedded = autodir + if trace_textdirections then + list[#list+1] = formatters["pardir %a"](dir) end + -- end + elseif subtype == dir_code then + local dir = current.dir + if dir == "+TRT" then + finish, autodir = "TRT", -1 + elseif dir == "-TRT" then + finish, autodir = nil, 0 + elseif dir == "+TLT" then + finish, autodir = "TLT", 1 + elseif dir == "-TLT" then + finish, autodir = nil, 0 end - if trace_directions then + if trace_textdirections then list[#list+1] = formatters["textdir %a, autodir %a"](dir,autodir) end end else - if trace_directions then + if trace_textdirections then list[#list+1] = formatters["node %a, subtype %a"](nodecodes[id],current.subtype) end if finish then @@ -468,23 +544,45 @@ function directions.process(namespace,attribute,start) -- todo: make faster current = cn end end - if trace_directions and glyphs then - report_directions("start log") + + if trace_textdirections and glyphs then + report_textdirections("start log") for i=1,#list do - report_directions("%02i: %s",i,list[i]) + report_textdirections("%02i: %s",i,list[i]) end - report_directions("stop log") + report_textdirections("stop log") end + if done and strip then local n = #obsolete if n > 0 then for i=1,n do remove_node(head,obsolete[i],true) end - report_directions("%s character nodes removed",n) + report_textdirections("%s character nodes removed",n) end end + return head, done + +end + +installhandler(variables.default,process_direct) + +function directions.process(namespace,attribute,head) -- nodes not nuts + if not head.next then + return head, false + end + local attr = head[a_directions] + if not attr or attr == 0 then + return head, false + end + local method = getmethod(attr) + local handler = handlers[method] + if not handler then + return head, false + end + return handler(namespace,attribute,head) end -- function directions.enable() @@ -495,8 +593,8 @@ local enabled = false function directions.set(n) -- todo: names and numbers if not enabled then - if trace_directions then - report_breakpoints("enabling directions handler") + if trace_textdirections then + report_textdirections("enabling directions handler") end tasks.enableaction("processors","typesetters.directions.handler") enabled = true @@ -511,7 +609,123 @@ end commands.setdirection = directions.set directions.handler = nodes.installattributehandler { - name = "directions", + name = "directions", namespace = directions, processor = directions.process, } + +-- As I'm wrapping up the updated math support (for CTX/TUG 2013) I wondered about numbers in +-- r2l math mode. Googling lead me to TUGboat, Volume 25 (2004), No. 2 where I see numbers +-- running from left to right. Makes me wonder how far we should go. And as I was looking +-- into bidi anyway, it's a nice distraction. +-- +-- I first tried to hook something into noads but that gets pretty messy due to indirectness +-- char noads. If needed, I'll do it that way. With regards to spacing: as we can assume that +-- only numbers are involved we can safely swap them and the same is true for mirroring. But +-- anyway, I'm not too happy with this solution so eventually I'll do something with noads (as +-- an alternative method). + +local function processmath(head) + local current = head + local done = false + local start = nil + local stop = nil + local function capsulate() + head = insert_node_before(head,start,new_textdir("+TLT")) + insert_node_after(head,stop,new_textdir("-TLT")) + if trace_mathdirections then + report_mathdirections("reversed: %s",nodes.listtoutf(start,false,false,stop)) + end + done = true + start = false + stop = nil + end + while current do + local id = current.id + if id == glyph_code then + local char = current.char + local cdir = chardirections[char] + if cdir == "en" or cdir == "an" then -- we could check for mathclass punctuation + if not start then + start = current + end + stop = current + else + if not start then + -- nothing + elseif start == stop then + start = nil + else + capsulate() + end + if cdir == "on" then + local mirror = charmirrors[char] + if mirror then + local class = charclasses[char] + if class == "open" or class == "close" then + current.char = mirror + if trace_mathdirections then + report_mathdirections("mirrored: %C to %C",char,mirror) + end + done = true + end + end + end + end + elseif not start then + -- nothing + elseif start == stop then + start = nil + else + capsulate(head,start,stop) + -- math can pack things into hlists .. we need to make sure we don't process + -- too often: needs checking + if id == hlist_code or id == vlist_code then + local list, d = processmath(current.list) + current.list = list + if d then + done = true + end + end + end + current = current.next + end + if not start then + -- nothing + elseif start == stop then + -- nothing + else + capsulate() + end + return head, done +end + +local enabled = false + +function directions.processmath(head) -- style, penalties + if enabled then + local a = head[a_mathbidi] + if a and a > 0 then + return processmath(head) + end + end + return head, false +end + +function directions.setmath(n) + if not enabled and n and n > 0 then + if trace_mathdirections then + report_mathdirections("enabling directions handler") + end + nodes.tasks.enableaction("math","typesetters.directions.processmath") + enabled = true + end +end + +commands.setmathdirection = directions.setmath + +-- directions.mathhandler = nodes.installattributehandler { +-- name = "directions", +-- namespace = directions, +-- processor = directions.processmath, +-- } diff --git a/tex/context/base/typo-dir.mkiv b/tex/context/base/typo-dir.mkiv index d35dfeb66..4dee24c53 100644 --- a/tex/context/base/typo-dir.mkiv +++ b/tex/context/base/typo-dir.mkiv @@ -21,11 +21,16 @@ \installcorenamespace{directions} \installcorenamespace{directionsbidimode} +\installcorenamespace{directionsbidimethod} +\installcorenamespace{directionsbidifences} % plural as we can have a combination but maybe better singular \installsimplecommandhandler \??directions {directions} \??directions % no \define... yet +\edef\lefttorightmark{\normalUchar"200E} \let\lrm\lefttorightmark +\edef\righttoleftmark{\normalUchar"200F} \let\rlm\righttoleftmark + \unexpanded\def\setdirection[#1]% todo: symbolic names {\ctxcommand{setdirection(\number#1)}} @@ -35,34 +40,54 @@ \newconstant\directionsbidimode % this one might become pivate -\letvalue{\??directionsbidimode\v!off }\zerocount -\letvalue{\??directionsbidimode\v!global}\plusone -\letvalue{\??directionsbidimode\v!local }\plustwo -\letvalue{\??directionsbidimode\v!on }\plustwo +% local modes = { +% method = 1024, -- not used +% global = 2048, +% fences = 4096, +% } + +% a = 0 +% a = a + modes.fences +% a = a + modes.global +% a = a + 2 -- method 2 + +% print(a) + +% print(number.hasbit(a,modes.fences)) +% print(number.hasbit(a,modes.global)) +% print(a % modes.method) + +\def\typo_dir_get_mode + {\def\currentbidimode{\ctxcommand{getbidimode { + scope = "\directionsparameter\c!bidi ", + method = "\directionsparameter\c!method", + fences = "\directionsparameter\c!fences", + }}}% + \expandafter\glet\csname\??directionsbidimode\currentbidistamp\endcsname\currentbidimode} \appendtoks - \directionsbidimode - \ifcsname\??directionsbidimode\directionsparameter\c!bidi\endcsname - \csname\??directionsbidimode\directionsparameter\c!bidi\endcsname - \else - \zerocount - \fi - \relax + \edef\currentbidistamp + {\directionsparameter\c!bidi + :\directionsparameter\c!method + :\directionsparameter\c!fences}% + \expandafter\let\expandafter\currentbidimode\csname\??directionsbidimode\currentbidistamp\endcsname + \ifx\currentbidimode\relax + \typo_dir_get_mode + \fi + \directionsbidimode\currentbidimode\relax \ifcase\directionsbidimode - \resetdirection - \or % 1 - \setdirection[1]% global, chars - \or % 2 - \setdirection[2]% local, attributes - \or % else - \setdirection[1]% default + \resetdirection + \else + \setdirection[\number\directionsbidimode]% \fi \to \everysetupdirections % bidi: local=obey grouping, global=ignore grouping (unicode has no grouping) \setupdirections % maybe start/stop - [\c!bidi=\v!off] + [\c!bidi=\v!off, + \c!method=\v!default, + \c!fences=\v!yes] \unexpanded\edef\bidilre{\normalUchar"202A} % maybe \edef's \unexpanded\edef\bidirle{\normalUchar"202B} diff --git a/tex/context/base/typo-uba.lua b/tex/context/base/typo-uba.lua new file mode 100644 index 000000000..3cfb7994b --- /dev/null +++ b/tex/context/base/typo-uba.lua @@ -0,0 +1,782 @@ +if not modules then modules = { } end modules ['typo-uba'] = { + version = 1.001, + comment = "companion to typo-dir.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team / See below", + license = "see context related readme files / whatever applies", + comment = "Unicode bidi (sort of) variant a", + derived = "derived from t-bidi by Khaled Hosny who derived from minibidi.c by Arabeyes", +} + +-- Comment by Khaled Hosny: +-- +-- This code started as a line for line translation of Arabeyes' minibidi.c from C to Lua, +-- excluding parts that of no use to us like shaping. The C code is Copyright (c) 2004 +-- Ahmad Khalifa, and is distributed under the MIT Licence. The full license text can be +-- found at: http://svn.arabeyes.org/viewvc/projects/adawat/minibidi/LICENCE. +-- +-- Comment by Hans Hagen: +-- +-- The initial conversion to Lua has been done by Khaled Hosny. As a first step I optimized the +-- code (to suit todays context mkiv). Next I fixed the foreign object handling, for instance, +-- we can skip over math but we need to inject before the open math node and after the close node, +-- so we need to keep track of the endpoint. After I fixed that bit I realized that it was possible +-- to generalize the object skipper if only because it saves memory (and processing time). The +-- current implementation is about three times as fast (roughly measured) and I can probably squeeze +-- out some more, only to sacrifice soem when I start adding features. A next stage will be to have +-- more granularity in foreign objects. Of course all errors are mine. I'll also added the usual bit +-- of context tracing and reshuffled some code. A memory optimization is on the agenda (already sort +-- of prepared). It is no longer line by line. +-- +-- The first implementation of bidi in context started out from examples of mixed usage (including +-- more than text) with an at that point bugged r2l support. It has some alternatives for letting +-- the tex markup having a bit higher priority. I will probably add some local (style driven) +-- overrides to the following code as well. It also means that we can selectively enable and disable +-- the parser (because a document wide appliance migh tnot be what we want). This will bring a +-- slow down but not that much. (I need to check with Idris why we have things like isol there.) +-- +-- We'll probably keep multiple methods around (this is just a side track of improving the already +-- available scanner). I need to look into the changed unicode recomendations anyway as a first +-- impression is that some fuzzyness has been removed. I finally need to spend time on those specs. So, +-- there will be a third variant (written from scratch) so some point. The fun about TeX is that we +-- can provide alternative solutions (given that it doesn't bloat the engine!) +-- +-- A test with some hebrew, mixed with hboxes with latin/hebrew and simple math. In fact this triggered +-- playing with bidi again: +-- +-- 0.11 : nothing +-- 0.14 : 0.03 node list only, one pass +-- 0.23 : 0.12 close to unicode bidi, multipass +-- 0.44 : 0.33 original previous +-- +-- todo: check for introduced errors +-- todo: reuse list, we have size, so we can just change values (and auto allocate when not there) +-- todo: reuse the stack +-- todo: no need for a max check +-- todo: collapse bound similar ranges (not ok yet) +-- tood: combine some sweeps + +local insert, remove, unpack, concat = table.insert, table.remove, table.unpack, table.concat +local utfchar = utf.char +local formatters = string.formatters + +local directiondata = characters.directions +local mirrordata = characters.mirrors + +local remove_node = nodes.remove +local insert_node_after = nodes.insert_after +local insert_node_before = nodes.insert_before + +local nodepool = nodes.pool +local new_textdir = nodepool.textdir + +local nodecodes = nodes.nodecodes +local whatsitcodes = nodes.whatsitcodes +local skipcodes = nodes.skipcodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local hlist_code = nodecodes.hlist +local vlist_code = nodecodes.vlist +local math_code = nodecodes.math +local whatsit_code = nodecodes.whatsit +local dir_code = whatsitcodes.dir +local localpar_code = whatsitcodes.localpar +local parfillskip_code = skipcodes.skipcodes + +----- object_replacement = 0xFFFC -- object replacement character +local maximum_stack = 60 -- probably spec but not needed + +local setcolor = nodes.tracers.colors.set +local resetcolor = nodes.tracers.colors.reset + +local directions = typesetters.directions + +local a_directions = attributes.private('directions') + +local remove_controls = true directives.register("typesetters.directions.one.removecontrols",function(v) remove_controls = v end) + +local trace_directions = false trackers .register("typesetters.directions.one", function(v) trace_directions = v end) +local trace_details = false trackers .register("typesetters.directions.one.details", function(v) trace_details = v end) + +local report_directions = logs.reporter("typesetting","directions one") + +local whitespace = { + lre = true, + rle = true, + lro = true, + rlo = true, + pdf = true, + bn = true, + ws = true, +} + +local b_s_ws_on = { + b = true, + s = true, + ws = true, + on = true +} + +-- tracing + +local function show_list(list,size,what) + local what = what or "direction" + local joiner = utfchar(0x200C) + local result = { } + for i=1,size do + local entry = list[i] + local character = entry.char + local direction = entry[what] + if character == 0xFFFC then + local first = entry.id + local last = entry.last + local skip = entry.skip + if last then + result[i] = formatters["%-3s:%s %s..%s (%i)"](direction,joiner,nodecodes[first],nodecodes[last],skip or 0) + else + result[i] = formatters["%-3s:%s %s (%i)"](direction,joiner,nodecodes[first],skip or 0) + end + elseif character >= 0x202A and character <= 0x202C then + result[i] = formatters["%-3s:%s %U"](direction,joiner,character) + else + result[i] = formatters["%-3s:%s %c %U"](direction,joiner,character,character) + end + end + return concat(result,joiner .. " | " .. joiner) +end + +-- preparation + +local function show_done(list,size) + local joiner = utfchar(0x200C) + local result = { } + for i=1,size do + local entry = list[i] + local character = entry.char + local begindir = entry.begindir + local enddir = entry.enddir + if begindir then + result[#result+1] = formatters["<%s>"](begindir) + end + if entry.remove then + -- continue + elseif character == 0xFFFC then + result[#result+1] = formatters["<%s>"]("?") + elseif character == 0x0020 then + result[#result+1] = formatters["<%s>"](" ") + elseif character >= 0x202A and character <= 0x202C then + result[#result+1] = formatters["<%s>"](entry.original) + else + result[#result+1] = utfchar(character) + end + if enddir then + result[#result+1] = formatters["<%s>"](enddir) + end + end + return concat(result,joiner) +end + +-- keeping the list and overwriting doesn't save much runtime, only a few percent +-- char is only used for mirror, so in fact we can as well only store it for +-- glyphs only + +local function build_list(head) -- todo: store node pointer ... saves loop + -- P1 + local current = head + local list = { } + local size = 0 + while current do + size = size + 1 + local id = current.id + if id == glyph_code then + local chr = current.char + local dir = directiondata[chr] + list[size] = { char = chr, direction = dir, original = dir, level = 0 } + current = current.next + elseif id == glue_code then + list[size] = { char = 0x0020, direction = "ws", original = "ws", level = 0 } + current = current.next + elseif id == whatsit_code and current.subtype == dir_code then + local dir = current.dir + if dir == "+TLT" then + list[size] = { char = 0x202A, direction = "lre", original = "lre", level = 0 } + elseif dir == "+TRT" then + list[size] = { char = 0x202B, direction = "rle", original = "rle", level = 0 } + elseif dir == "-TLT" or dir == "-TRT" then + list[size] = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } + else + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, id = id } -- object replacement character + end + current = current.next + elseif id == math_code then + local skip = 0 + current = current.next + while current.id ~= math_code do + skip = skip + 1 + current = current.next + end + skip = skip + 1 + current = current.next + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } + else + local skip = 0 + local last = id + current = current.next + while n do + local id = current.id + if id ~= glyph_code and id ~= glue_code and not (id == whatsit_code and current.subtype == dir_code) then + skip = skip + 1 + last = id + current = current.next + else + break + end + end + if id == last then + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } + else + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id, last = last } + end + end + end + return list, size +end + +-- the action + +-- local function find_run_limit_et(list,run_start,limit) +-- local run_limit = run_start +-- local i = run_start +-- while i <= limit and list[i].direction == "et" do +-- run_limit = i +-- i = i + 1 +-- end +-- return run_limit +-- end + +local function find_run_limit_et(list,start,limit) -- returns last match + for i=start,limit do + if list[i].direction == "et" then + start = i + else + return start + end + end + return start +end + +-- local function find_run_limit_b_s_ws_on(list,run_start,limit) +-- local run_limit = run_start +-- local i = run_start +-- while i <= limit and b_s_ws_on[list[i].direction] do +-- run_limit = i +-- i = i + 1 +-- end +-- return run_limit +-- end + +local function find_run_limit_b_s_ws_on(list,start,limit) + for i=start,limit do + if b_s_ws_on[list[i].direction] then + start = i + else + return start + end + end + return start +end + +-- directions.maindir = "r2l" + +local function get_baselevel(head,list,size) -- todo: skip if first is object (or pass head and test for local_par) + local maindir = directions.maindir + if maindir == "r2l" then + return 1, "TRT", false + elseif maindir == "l2r" then + return 0, "TLT", false + elseif head.id == whatsit_code and head.subtype == localpar_code then + if head.dir == "TRT" then + return 1, "TRT", true + else + return 0, "TLT", true + end + else + -- P2, P3 + for i=1,size do + local entry = list[i] + local direction = entry.direction + if direction == "r" or direction == "al" then + return 1, "TRT", true + elseif direction == "l" then + return 0, "TLT", true + end + end + return 0, "TLT", false + end +end + +local function resolve_explicit(list,size,baselevel) + -- X1 + local level = baselevel + local override = "on" + local stack = { } + local nofstack = 0 + for i=1,size do + local entry = list[i] + local direction = entry.direction + -- X2 + if direction == "rle" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X3 + elseif direction == "lre" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X4 + elseif direction == "rlo" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "r" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X5 + elseif direction == "lro" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "l" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X7 + elseif direction == "pdf" then + if nofstack < maximum_stack then + local stacktop = stack[nofstack] + nofstack = nofstack - 1 + level = stacktop[1] + override = stacktop[2] + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X6 + else + entry.level = level + if override ~= "on" then + entry.direction = override + end + end + end + -- X8 (reset states and overrides after paragraph) +end + +local function resolve_weak(list,size,start,limit,sor,eor) + -- W1 + for i=start,limit do + local entry = list[i] + if entry.direction == "nsm" then + if i == start then + entry.direction = sor + else + entry.direction = list[i-1].direction + end + end + end + -- W2 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + for j=i-1,start,-1 do + local prev = list[j] + local direction = prev.direction + if direction == "al" then + entry.direction = "an" + break + elseif direction == "r" or direction == "l" then + break + end + end + end + end + -- W3 + for i=start,limit do + local entry = list[i] + if entry.direction == "al" then + entry.direction = "r" + end + end + -- W4 + for i=start+1,limit-1 do + local entry = list[i] + local direction = entry.direction + if direction == "es" then + if list[i-1].direction == "en" and list[i+1].direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = list[i-1].direction + if prevdirection == "en" then + if list[i+1].direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and list[i+1].direction == "an" then + entry.direction = "an" + end + end + end + -- W5 + local i = start + while i <= limit do + if list[i].direction == "et" then + local runstart = i + local runlimit = find_run_limit_et(list,runstart,limit) -- when moved inline we can probably collapse a lot + local rundirection = runstart == start and sor or list[runstart-1].direction + if rundirection ~= "en" then + rundirection = runlimit == limit and eor or list[runlimit+1].direction + end + if rundirection == "en" then + for j=runstart,runlimit do + list[j].direction = "en" + end + end + i = runlimit + end + i = i + 1 + end + -- W6 + for i=start,limit do + local entry = list[i] + local direction = entry.direction + if direction == "es" or direction == "et" or direction == "cs" then + entry.direction = "on" + end + end + -- W7 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + local prev_strong = sor + for j=i-1,start,-1 do + local direction = list[j].direction + if direction == "l" or direction == "r" then + prev_strong = direction + break + end + end + if prev_strong == "l" then + entry.direction = "l" + end + end + end +end + +local function resolve_neutral(list,size,start,limit,sor,eor) + -- N1, N2 + for i=start,limit do + local entry = list[i] + if b_s_ws_on[entry.direction] then + local leading_direction, trailing_direction, resolved_direction + local runstart = i + local runlimit = find_run_limit_b_s_ws_on(list,runstart,limit) + if runstart == start then + leading_direction = sor + else + leading_direction = list[runstart-1].direction + if leading_direction == "en" or leading_direction == "an" then + leading_direction = "r" + end + end + if runlimit == limit then + trailing_direction = eor + else + trailing_direction = list[runlimit+1].direction + if trailing_direction == "en" or trailing_direction == "an" then + trailing_direction = "r" + end + end + if leading_direction == trailing_direction then + -- N1 + resolved_direction = leading_direction + else + -- N2 / does the weird period + resolved_direction = entry.level % 2 == 1 and "r" or "l" -- direction_of_level(entry.level) + end + for j=runstart,runlimit do + list[j].direction = resolved_direction + end + i = runlimit + end + i = i + 1 + end +end + +local function resolve_implicit(list,size,start,limit,sor,eor) + -- I1 + for i=start,limit do + local entry = list[i] + local level = entry.level + if level % 2 ~= 1 then -- not odd(level) + local direction = entry.direction + if direction == "r" then + entry.level = level + 1 + elseif direction == "an" or direction == "en" then + entry.level = level + 2 + end + end + end + -- I2 + for i=start,limit do + local entry = list[i] + local level = entry.level + if level % 2 == 1 then -- odd(level) + local direction = entry.direction + if direction == "l" or direction == "en" or direction == "an" then + entry.level = level + 1 + end + end + end +end + +local function resolve_levels(list,size,baselevel) + -- X10 + local start = 1 + while start < size do + local level = list[start].level + local limit = start + 1 + while limit < size and list[limit].level == level do + limit = limit + 1 + end + local prev_level = start == 1 and baselevel or list[start-1].level + local next_level = limit == size and baselevel or list[limit+1].level + local sor = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,prev_level)) + local eor = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,next_level)) + -- W1 .. W7 + resolve_weak(list,size,start,limit,sor,eor) + -- N1 .. N2 + resolve_neutral(list,size,start,limit,sor,eor) + -- I1 .. I2 + resolve_implicit(list,size,start,limit,sor,eor) + start = limit + end + -- L1 + for i=1,size do + local entry = list[i] + local direction = entry.original + -- (1) + if direction == "s" or direction == "b" then + entry.level = baselevel + -- (2) + for j=i-1,1,-1 do + local entry = list[j] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + end + end + -- (3) + for i=size,1,-1 do + local entry = list[i] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + -- L4 + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + local mirror = mirrordata[entry.char] + if mirror then + entry.mirror = mirror + end + end + end +end + +local function insert_dir_points(list,size) + -- L2, but no actual reversion is done, we simply annotate where + -- begindir/endddir node will be inserted. + local maxlevel = 0 + local finaldir = false + for i=1,size do + local level = list[i].level + if level > maxlevel then + maxlevel = level + end + end + for level=0,maxlevel do + local started = false + local begindir = nil + local enddir = nil + if level % 2 == 1 then + begindir = "+TRT" + enddir = "-TRT" + else + begindir = "+TLT" + enddir = "-TLT" + end + for i=1,size do + local entry = list[i] + if entry.level >= level then + if not started then + entry.begindir = begindir + started = true + end + else + if started then + list[i-1].enddir = enddir + started = false + end + end + end + -- make sure to close the run at end of line + if started then + finaldir = enddir + end + end + if finaldir then + list[size].enddir = finaldir + end +end + +local function apply_to_list(list,size,head,pardir) + local index = 1 + local current = head + local done = false + while current do + if index > size then + report_directions("fatal error, size mismatch") + break + end + local id = current.id + local entry = list[index] + local begindir = entry.begindir + local enddir = entry.enddir + if id == glyph_code then + local mirror = entry.mirror + if mirror then + current.char = mirror + end + if trace_directions then + local original = entry.original + local direction = entry.direction + if mirror then + setcolor(current,"trace:dc") + elseif direction == "l" then + if original == direction then + setcolor(current,"trace:dr") + else + setcolor(current,"trace:dm") + end + elseif direction == "r" then + if original == direction then + setcolor(current,"trace:db") + else + setcolor(current,"trace:dg") + end + else + resetcolor(current) + end + end + elseif id == hlist_code or id == vlist_code then + -- current.list = process(current.list) -- not needed + current.dir = pardir -- is this really needed? + elseif id == glue_code then + if enddir and current.subtype == parfillskip_code then + -- insert the last enddir before \parfillskip glue + head = insert_node_before(head,current,new_textdir(enddir)) + enddir = false + done = true + end + elseif id == whatsit_code then + if begindir and current.subtype == localpar_code then + -- local_par should always be the 1st node + head, current = insert_node_after(head,current,new_textdir(begindir)) + begindir = nil + done = true + end + end + if begindir then + head = insert_node_before(head,current,new_textdir(begindir)) + done = true + end + local skip = entry.skip + if skip and skip > 0 then + for i=1,skip do + current = current.next + end + end + if enddir then + head, current = insert_node_after(head,current,new_textdir(enddir)) + done = true + end + if not entry.remove then + current = current.next + elseif remove_controls then + -- X9 + head, current = remove_node(head,current,true) + done = true + else + current = current.next + end + index = index + 1 + end + return head, done +end + +local function process(namespace,attribute,head) + local list, size = build_list(head) + local baselevel, pardir, dirfound = get_baselevel(head,list,size) -- we always have an inline dir node in context + if not dirfound and trace_details then + report_directions("no initial direction found, gambling") + end + if trace_details then + report_directions("before : %s",show_list(list,size,"original")) + end + resolve_explicit(list,size,baselevel) + resolve_levels(list,size,baselevel) + insert_dir_points(list,size) + if trace_details then + report_directions("after : %s",show_list(list,size,"direction")) + report_directions("result : %s",show_done(list,size)) + end + head, done = apply_to_list(list,size,head,pardir) + return head, done +end + +directions.installhandler(interfaces.variables.one,process) diff --git a/tex/context/base/typo-ubb.lua b/tex/context/base/typo-ubb.lua new file mode 100644 index 000000000..4cec90084 --- /dev/null +++ b/tex/context/base/typo-ubb.lua @@ -0,0 +1,887 @@ +if not modules then modules = { } end modules ['typo-ubb'] = { + version = 1.001, + comment = "companion to typo-dir.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", + comment = "Unicode bidi (sort of) variant b", +} + +-- This is a follow up on typo-uba which itself is a follow up on t-bidi by Khaled Hosny which +-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as +-- an update on some recent unicode bidi developments. There is (and will) also be more control +-- added. As a consequence this module is somewhat slower than its precursor which itself is +-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin +-- support. + +-- todo (cf html): +-- +-- normal The element does not offer a additional level of embedding with respect to the bidirectional algorithm. For inline elements implicit reordering works across element boundaries. +-- embed If the element is inline, this value opens an additional level of embedding with respect to the bidirectional algorithm. The direction of this embedding level is given by the direction property. +-- bidi-override For inline elements this creates an override. For block container elements this creates an override for inline-level descendants not within another block container element. This means that inside the element, reordering is strictly in sequence according to the direction property; the implicit part of the bidirectional algorithm is ignored. +-- isolate This keyword indicates that the element's container directionality should be calculated without considering the content of this element. The element is therefore isolated from its siblings. When applying its bidirectional-resolution algorithm, its container element treats it as one or several U+FFFC Object Replacement Character, i.e. like an image. +-- isolate-override This keyword applies the isolation behavior of the isolate keyword to the surrounding content and the override behavior o f the bidi-override keyword to the inner content. +-- plaintext This keyword makes the elements directionality calculated without considering its parent bidirectional state or the value of the direction property. The directionality is calculated using the P2 and P3 rules of the Unicode Bidirectional Algorithm. +-- This value allows to display data which has already formatted using a tool following the Unicode Bidirectional Algorithm. +-- +-- todo: check for introduced errors +-- todo: reuse list, we have size, so we can just change values (and auto allocate when not there) +-- todo: reuse the stack +-- todo: no need for a max check +-- todo: collapse bound similar ranges (not ok yet) +-- todo: combine some sweeps +-- todo: add fence parser +-- todo: removing is not needed when we inject at the same spot (only chnage the dir property) +-- todo: isolated runs (isolating runs are similar to bidi=local in the basic analyzer) + +-- todo: check unicode addenda (from the draft): +-- +-- Added support for canonical equivalents in BD16. +-- Changed logic in N0 to not check forwards for context in the case of enclosed text opposite the embedding direction. +-- Major extension of the algorithm to allow for the implementation of directional isolates and the introduction of new isolate-related values to the Bidi_Class property. +-- Adds BD8, BD9, BD10, BD11, BD12, BD13, BD14, BD15, and BD16, Sections 2.4 and 2.5, and Rules X5a, X5b, X5c and X6a. +-- Extensively revises Section 3.3.2, Explicit Levels and Directions and its existing X rules to formalize the algorithm for matching a PDF with the embedding or override initiator whose scope it terminates. +-- Moves Rules X9 and X10 into a separate new Section 3.3.3, Preparations for Implicit Processing. +-- Modifies Rule X10 to make the isolating run sequence the unit to which subsequent rules are applied. +-- Modifies Rule W1 to change an NSM preceded by an isolate initiator or PDI into ON. +-- Adds Rule N0 and makes other changes to Section 3.3.5, Resolving Neutral and Isolate Formatting Types to resolve bracket pairs to the same level. +-- Adds the new ARABIC LETTER MARK (U+061C) character to Section 2.6, Implicit Directional Marks and Table 4 Bidirectional Character Types. + +local insert, remove, unpack, concat = table.insert, table.remove, table.unpack, table.concat +local utfchar = utf.char +local formatters = string.formatters + +local directiondata = characters.directions +local mirrordata = characters.mirrors +local textclassdata = characters.textclasses + +local remove_node = nodes.remove +local insert_node_after = nodes.insert_after +local insert_node_before = nodes.insert_before + +local nodepool = nodes.pool +local new_textdir = nodepool.textdir + +local nodecodes = nodes.nodecodes +local whatsitcodes = nodes.whatsitcodes +local skipcodes = nodes.skipcodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local hlist_code = nodecodes.hlist +local vlist_code = nodecodes.vlist +local math_code = nodecodes.math +local whatsit_code = nodecodes.whatsit +local dir_code = whatsitcodes.dir +local localpar_code = whatsitcodes.localpar +local parfillskip_code = skipcodes.skipcodes + +local maximum_stack = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much + +local setcolor = nodes.tracers.colors.set +local resetcolor = nodes.tracers.colors.reset + +local directions = typesetters.directions +directions.maindir = nil -- not used + +local getfences = directions.getfences + +local a_directions = attributes.private('directions') +local a_textbidi = attributes.private('textbidi') +local a_state = attributes.private('state') + +local s_isol = fonts.analyzers.states.isol + +-- current[a_state] = s_isol -- maybe better have a special bidi attr value -> override (9) -> todo + +local remove_controls = true directives.register("typesetters.directions.removecontrols",function(v) remove_controls = v end) +----- analyze_fences = true directives.register("typesetters.directions.analyzefences", function(v) analyze_fences = v end) + +local trace_directions = false trackers .register("typesetters.directions.two", function(v) trace_directions = v end) +local trace_details = false trackers .register("typesetters.directions.two.details", function(v) trace_details = v end) + +local report_directions = logs.reporter("typesetting","directions two") + +-- strong (old): +-- +-- l : left to right +-- r : right to left +-- lro : left to right override +-- rlo : left to left override +-- lre : left to right embedding +-- rle : left to left embedding +-- al : right to legt arabic (esp punctuation issues) + +-- weak: +-- +-- en : english number +-- es : english number separator +-- et : english number terminator +-- an : arabic number +-- cs : common number separator +-- nsm : nonspacing mark +-- bn : boundary neutral + +-- neutral: +-- +-- b : paragraph separator +-- s : segment separator +-- ws : whitespace +-- on : other neutrals + +-- interesting: this is indeed better (and more what we expect i.e. we already use this split +-- in the old original (also these isolates) + +-- strong (new): +-- +-- l : left to right +-- r : right to left +-- al : right to legt arabic (esp punctuation issues) + +-- explicit: (new) +-- +-- lro : left to right override +-- rlo : left to left override +-- lre : left to right embedding +-- rle : left to left embedding +-- pdf : pop dir format +-- lri : left to right isolate +-- rli : left to left isolate +-- fsi : first string isolate +-- pdi : pop directional isolate + +local whitespace = { + lre = true, + rle = true, + lro = true, + rlo = true, + pdf = true, + bn = true, + ws = true, +} + +local b_s_ws_on = { + b = true, + s = true, + ws = true, + on = true +} + +-- tracing + +local function show_list(list,size,what) + local what = what or "direction" + local joiner = utfchar(0x200C) + local result = { } + for i=1,size do + local entry = list[i] + local character = entry.char + local direction = entry[what] + if character == 0xFFFC then + local first = entry.id + local last = entry.last + local skip = entry.skip + if last then + result[i] = formatters["%-3s:%s %s..%s (%i)"](direction,joiner,nodecodes[first],nodecodes[last],skip or 0) + else + result[i] = formatters["%-3s:%s %s (%i)"](direction,joiner,nodecodes[first],skip or 0) + end + elseif character >= 0x202A and character <= 0x202C then + result[i] = formatters["%-3s:%s %U"](direction,joiner,character) + else + result[i] = formatters["%-3s:%s %c %U"](direction,joiner,character,character) + end + end + return concat(result,joiner .. " | " .. joiner) +end + +-- preparation + +local function show_done(list,size) + local joiner = utfchar(0x200C) + local result = { } + for i=1,size do + local entry = list[i] + local character = entry.char + local begindir = entry.begindir + local enddir = entry.enddir + if begindir then + result[#result+1] = formatters["<%s>"](begindir) + end + if entry.remove then + -- continue + elseif character == 0xFFFC then + result[#result+1] = formatters["<%s>"]("?") + elseif character == 0x0020 then + result[#result+1] = formatters["<%s>"](" ") + elseif character >= 0x202A and character <= 0x202C then + result[#result+1] = formatters["<%s>"](entry.original) + else + result[#result+1] = utfchar(character) + end + if enddir then + result[#result+1] = formatters["<%s>"](enddir) + end + end + return concat(result,joiner) +end + +-- keeping the list and overwriting doesn't save much runtime, only a few percent +-- char is only used for mirror, so in fact we can as well only store it for +-- glyphs only + +local function build_list(head) -- todo: store node pointer ... saves loop + -- P1 + local current = head + local list = { } + local size = 0 + while current do + size = size + 1 + local id = current.id + if id == glyph_code then + local chr = current.char + local dir = directiondata[chr] + list[size] = { char = chr, direction = dir, original = dir, level = 0 } + current = current.next + elseif id == glue_code then + list[size] = { char = 0x0020, direction = "ws", original = "ws", level = 0 } + current = current.next + elseif id == whatsit_code and current.subtype == dir_code then + local dir = current.dir + if dir == "+TLT" then + list[size] = { char = 0x202A, direction = "lre", original = "lre", level = 0 } + elseif dir == "+TRT" then + list[size] = { char = 0x202B, direction = "rle", original = "rle", level = 0 } + elseif dir == "-TLT" or dir == "-TRT" then + list[size] = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } + else + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, id = id } -- object replacement character + end + current = current.next + elseif id == math_code then + local skip = 0 + current = current.next + while current.id ~= math_code do + skip = skip + 1 + current = current.next + end + skip = skip + 1 + current = current.next + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } + else + local skip = 0 + local last = id + current = current.next + while n do + local id = current.id + if id ~= glyph_code and id ~= glue_code and not (id == whatsit_code and current.subtype == dir_code) then + skip = skip + 1 + last = id + current = current.next + else + break + end + end + if id == last then + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id } + else + list[size] = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = skip, id = id, last = last } + end + end + end + return list, size +end + +-- new + +-- we could support ( ] and [ ) and such ... + +-- ש ) ל ( א 0-0 +-- ש ( ל ] א 0-0 +-- ש ( ל ) א 2-4 +-- ש ( ל [ א ) כ ] 2-6 +-- ש ( ל ] א ) כ 2-6 +-- ש ( ל ) א ) כ 2-4 +-- ש ( ל ( א ) כ 4-6 +-- ש ( ל ( א ) כ ) 2-8,4-6 +-- ש ( ל [ א ] כ ) 2-8,4-6 + +function resolve_fences(list,size,start,limit) + -- N0 + local stack = { } + local top = 0 + for i=start,limit do + local entry = list[i] + if entry.direction == "on" then + local char = entry.char + local mirror = mirrordata[char] + if mirror then + local class = textclassdata[char] + entry.mirror = mirror + entry.class = class + if class == "open" then + top = top + 1 + stack[top] = { mirror, i, false } + elseif top == 0 then + -- skip + elseif class == "close" then + for j=top,1,-1 do + top = j + local s = stack[j] + if s[1] == char and not s[3] then + s[3] = i + break + end + end + end + end + end + end + for i=1,#stack do + local s = stack[i] + if s[3] then + local open = s[2] + local close = s[3] + list[open ].paired = close + list[close].paired = open + end + end +-- inspect(stack) +-- inspect(list) +end + +-- the action + +local function get_baselevel(head,list,size) -- todo: skip if first is object (or pass head and test for local_par) + local maindir = directions.maindir + if maindir == "r2l" then + return 1, "TRT", false + elseif maindir == "l2r" then + return 0, "TLT", false + elseif head.id == whatsit_code and head.subtype == localpar_code then + if head.dir == "TRT" then + return 1, "TRT", true + else + return 0, "TLT", true + end + else + -- P2, P3 + for i=1,size do + local entry = list[i] + local direction = entry.direction + if direction == "r" or direction == "al" then + return 1, "TRT", true + elseif direction == "l" then + return 0, "TLT", true + end + end + return 0, "TLT", false + end +end + +local function resolve_explicit(list,size,baselevel) + -- X1 + local level = baselevel + local override = "on" + local stack = { } + local nofstack = 0 + for i=1,size do + local entry = list[i] + local direction = entry.direction + -- X2 + if direction == "rle" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X3 + elseif direction == "lre" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X4 + elseif direction == "rlo" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "r" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X5 + elseif direction == "lro" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + stack[nofstack] = { level, override } + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "l" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X7 + elseif direction == "pdf" then + if nofstack < maximum_stack then + local stacktop = stack[nofstack] + nofstack = nofstack - 1 + level = stacktop[1] + override = stacktop[2] + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X6 + else + entry.level = level + if override ~= "on" then + entry.direction = override + end + end + end + -- X8 (reset states and overrides after paragraph) +end + +local function resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- W1: non spacing marks get the direction of the previous character + for i=start,limit do + local entry = list[i] + if entry.direction == "nsm" then + if i == start then + entry.direction = orderbefore + else + entry.direction = list[i-1].direction + end + end + end + -- W2: mess with numbers and arabic + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + for j=i-1,start,-1 do + local prev = list[j] + local direction = prev.direction + if direction == "al" then + entry.direction = "an" + break + elseif direction == "r" or direction == "l" then + break + end + end + end + end + -- W3 + for i=start,limit do + local entry = list[i] + if entry.direction == "al" then + entry.direction = "r" + end + end + -- W4: make separators number + for i=start+1,limit-1 do + local entry = list[i] + local direction = entry.direction + if direction == "es" then + if list[i-1].direction == "en" and list[i+1].direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = list[i-1].direction + if prevdirection == "en" then + if list[i+1].direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and list[i+1].direction == "an" then + entry.direction = "an" + end + end + end + -- W5 + local i = start + while i <= limit do + if list[i].direction == "et" then + local runstart = i + -- local runlimit = find_run_limit_et(list,runstart,limit) -- when moved inline we can probably collapse a lot + + local runlimit = runstart + for i=runstart,limit do + if list[i].direction == "et" then + runlimit = i + else + break + end + end + + local rundirection = runstart == start and sor or list[runstart-1].direction + if rundirection ~= "en" then + rundirection = runlimit == limit and orderafter or list[runlimit+1].direction + end + if rundirection == "en" then + for j=runstart,runlimit do + list[j].direction = "en" + end + end + i = runlimit + end + i = i + 1 + end + -- W6 + for i=start,limit do + local entry = list[i] + local direction = entry.direction + if direction == "es" or direction == "et" or direction == "cs" then + entry.direction = "on" + end + end + -- W7 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + local prev_strong = orderbefore + for j=i-1,start,-1 do + local direction = list[j].direction + if direction == "l" or direction == "r" then + prev_strong = direction + break + end + end + if prev_strong == "l" then + entry.direction = "l" + end + end + end +end + +local function resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- N1, N2 + for i=start,limit do + local entry = list[i] + if b_s_ws_on[entry.direction] then + local leading_direction, trailing_direction, resolved_direction + local runstart = i + -- local runlimit = find_run_limit_b_s_ws_on(list,runstart,limit) + + local runlimit = runstart + for i=runstart,limit do + if b_s_ws_on[list[i].direction] then + runstart = i + else + break + end + end + + if runstart == start then + leading_direction = sor + else + leading_direction = list[runstart-1].direction + if leading_direction == "en" or leading_direction == "an" then + leading_direction = "r" + end + end + if runlimit == limit then + trailing_direction = orderafter + else + trailing_direction = list[runlimit+1].direction + if trailing_direction == "en" or trailing_direction == "an" then + trailing_direction = "r" + end + end + if leading_direction == trailing_direction then + -- N1 + resolved_direction = leading_direction + else + -- N2 / does the weird period + resolved_direction = entry.level % 2 == 1 and "r" or "l" -- direction_of_level(entry.level) + end + for j=runstart,runlimit do + list[j].direction = resolved_direction + end + i = runlimit + end + i = i + 1 + end +end + +local function resolve_implicit(list,size,start,limit,orderbefore,orderafter) + -- I1 + for i=start,limit do + local entry = list[i] + local level = entry.level + if level % 2 ~= 1 then -- not odd(level) + local direction = entry.direction + if direction == "r" then + entry.level = level + 1 + elseif direction == "an" or direction == "en" then + entry.level = level + 2 + end + end + end + -- I2 + for i=start,limit do + local entry = list[i] + local level = entry.level + if level % 2 == 1 then -- odd(level) + local direction = entry.direction + if direction == "l" or direction == "en" or direction == "an" then + entry.level = level + 1 + end + end + end +end + +local function resolve_levels(list,size,baselevel,analyze_fences) + -- X10 + local start = 1 + while start < size do + local level = list[start].level + local limit = start + 1 + while limit < size and list[limit].level == level do + limit = limit + 1 + end + local prev_level = start == 1 and baselevel or list[start-1].level + local next_level = limit == size and baselevel or list[limit+1].level + local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,prev_level)) + local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" -- direction_of_level(max(level,next_level)) + -- W1 .. W7 + resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- N0 + if analyze_fences then + resolve_fences(list,size,start,limit) + end + -- N1 .. N2 + resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- I1 .. I2 + resolve_implicit(list,size,start,limit,orderbefore,orderafter) + start = limit + end + -- L1 + for i=1,size do + local entry = list[i] + local direction = entry.original + -- (1) + if direction == "s" or direction == "b" then + entry.level = baselevel + -- (2) + for j=i-1,1,-1 do + local entry = list[j] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + end + end + -- (3) + for i=size,1,-1 do + local entry = list[i] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + -- L4 + if analyze_fences then + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + if entry.mirror and not entry.paired then + entry.mirror = false + end + -- okay + elseif entry.mirror then + entry.mirror = false + end + end + else + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + local mirror = mirrordata[entry.char] + if mirror then + entry.mirror = mirror + end + end + end + end +end + +local function insert_dir_points(list,size) + -- L2, but no actual reversion is done, we simply annotate where + -- begindir/endddir node will be inserted. + local maxlevel = 0 + local finaldir = false + for i=1,size do + local level = list[i].level + if level > maxlevel then + maxlevel = level + end + end + for level=0,maxlevel do + local started = false + local begindir = nil + local enddir = nil + if level % 2 == 1 then + begindir = "+TRT" + enddir = "-TRT" + else + begindir = "+TLT" + enddir = "-TLT" + end + for i=1,size do + local entry = list[i] + if entry.level >= level then + if not started then + entry.begindir = begindir + started = true + end + else + if started then + list[i-1].enddir = enddir + started = false + end + end + end + -- make sure to close the run at end of line + if started then + finaldir = enddir + end + end + if finaldir then + list[size].enddir = finaldir + end +end + +local function apply_to_list(list,size,head,pardir) + local index = 1 + local current = head + local done = false + while current do + if index > size then + report_directions("fatal error, size mismatch") + break + end + local id = current.id + local entry = list[index] + local begindir = entry.begindir + local enddir = entry.enddir + if id == glyph_code then + local mirror = entry.mirror + if mirror then + current.char = mirror + end + if trace_directions then + local original = entry.original + local direction = entry.direction + if mirror then + setcolor(current,"trace:dc") + elseif direction == "l" then + if original == direction then + setcolor(current,"trace:dr") + else + setcolor(current,"trace:dm") + end + elseif direction == "r" then + if original == direction then + setcolor(current,"trace:db") + else + setcolor(current,"trace:dg") + end + else + resetcolor(current) + end + end + elseif id == hlist_code or id == vlist_code then + -- current.list = process(current.list) -- not needed + current.dir = pardir -- is this really needed? + elseif id == glue_code then + if enddir and current.subtype == parfillskip_code then + -- insert the last enddir before \parfillskip glue + head = insert_node_before(head,current,new_textdir(enddir)) + enddir = false + done = true + end + elseif id == whatsit_code then + if begindir and current.subtype == localpar_code then + -- local_par should always be the 1st node + head, current = insert_node_after(head,current,new_textdir(begindir)) + begindir = nil + done = true + end + end + if begindir then + head = insert_node_before(head,current,new_textdir(begindir)) + done = true + end + local skip = entry.skip + if skip and skip > 0 then + for i=1,skip do + current = current.next + end + end + if enddir then + head, current = insert_node_after(head,current,new_textdir(enddir)) + done = true + end + if not entry.remove then + current = current.next + elseif remove_controls then + -- X9 + head, current = remove_node(head,current,true) + done = true + else + current = current.next + end + index = index + 1 + end + return head, done +end + +local function process(namespace,attribute,head) + -- for the moment a whole paragraph property + local attr = head[a_directions] + local analyze_fences = getfences(attr) + -- + local list, size = build_list(head) + local baselevel, pardir, dirfound = get_baselevel(head,list,size) -- we always have an inline dir node in context + if not dirfound and trace_details then + report_directions("no initial direction found, gambling") + end + if trace_details then + report_directions("before : %s",show_list(list,size,"original")) + end + resolve_explicit(list,size,baselevel) + resolve_levels(list,size,baselevel,analyze_fences) + insert_dir_points(list,size) + if trace_details then + report_directions("after : %s",show_list(list,size,"direction")) + report_directions("result : %s",show_done(list,size)) + end + head, done = apply_to_list(list,size,head,pardir) + return head, done +end + +directions.installhandler(interfaces.variables.two,process) diff --git a/tex/context/base/util-prs.lua b/tex/context/base/util-prs.lua index 7fe1e703b..7a8c3ce39 100644 --- a/tex/context/base/util-prs.lua +++ b/tex/context/base/util-prs.lua @@ -261,6 +261,16 @@ function parsers.simple_hash_to_string(h, separator) return concat(t,separator or ",") end +-- for mtx-context etc: aaaa bbbb cccc=dddd eeee=ffff + +local str = C((1-whitespace-equal)^1) +local setting = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset) +local splitter = setting^1 + +function utilities.parsers.options_to_hash(str,target) + return str and lpegmatch(splitter,str,1,target or { }) or { } +end + -- for chem (currently one level) local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) @@ -569,7 +579,7 @@ local function fetch(t,name) return t[name] or { } end -function process(result,more) +local function process(result,more) for k, v in next, more do result[k] = v end diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 95dd2231a..4a51d33e1 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 09/03/13 10:22:07 +-- merge date : 09/07/13 13:50:00 do -- begin closure to overcome local limits and interference -- cgit v1.2.3