diff options
33 files changed, 7838 insertions, 431 deletions
diff --git a/doc/context/documents/general/manuals/luametatex.pdf b/doc/context/documents/general/manuals/luametatex.pdf Binary files differindex b53a0cf9f..e71e05d2d 100644 --- a/doc/context/documents/general/manuals/luametatex.pdf +++ b/doc/context/documents/general/manuals/luametatex.pdf diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-codes.tex b/doc/context/sources/general/manuals/luametatex/luametatex-codes.tex new file mode 100644 index 000000000..103bb2473 --- /dev/null +++ b/doc/context/sources/general/manuals/luametatex/luametatex-codes.tex @@ -0,0 +1,22 @@ +\environment luametatex-style + +\startcomponent luametatex-codes + +\startchapter[title=Primitive codes] + +here follows a list with all primitives and their category is shown. When the +engine starts up in ini mode all primitives get defined along with some +properties that makes it possible to do a reverse lookup of a combination of +command code and char code. But, a primitive, being also a regular command can be +redefined later on. The table below shows the original pairs but in \CONTEXT\ +some of these primitives are redefined. However, any macro that fits a command +and char pair is (reported as) a primitive in logs and error messages. In the end +all tokens are such a combination, The first 16 command codes are reserved for +characters (the whole \UNICODE\ range can be used as char code) with specific +catcodes and not mentioned in the list. + +\ctxlua{document.allprimitives()} + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-style.tex b/doc/context/sources/general/manuals/luametatex/luametatex-style.tex index 4d3ef36e3..1b4a8d5a1 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-style.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-style.tex @@ -115,11 +115,11 @@ \stopluacode -\unexpanded\def\showfields #1{\ctxlua{document.functions.showfields("#1")}} -\unexpanded\def\showid #1{\ctxlua{document.functions.showid("#1")}} -\unexpanded\def\showsubtypes#1{\ctxlua{document.functions.showsubtypes("#1")}} -\unexpanded\def\showvalues #1{\ctxlua{document.functions.showvalues(node.values("#1"))}} -\unexpanded\def\showtypes {\ctxlua{document.functions.showvalues(node.types())}} +\protected\def\showfields #1{\ctxlua{document.functions.showfields("#1")}} +\protected\def\showid #1{\ctxlua{document.functions.showid("#1")}} +\protected\def\showsubtypes#1{\ctxlua{document.functions.showsubtypes("#1")}} +\protected\def\showvalues #1{\ctxlua{document.functions.showvalues(node.values("#1"))}} +\protected\def\showtypes {\ctxlua{document.functions.showvalues(node.types())}} \definecolor[blue] [b=.5] @@ -320,13 +320,13 @@ {\strut\useMPgraphic{luanumber}} \stopsetups -\unexpanded\def\nonterminal#1>{\mathematics{\langle\hbox{\rm #1}\rangle}} +\protected\def\nonterminal#1>{\mathematics{\langle\hbox{\rm #1}\rangle}} % taco's brainwave -) .. todo: create a typing variant so that we can avoid the !crlf \newcatcodetable\syntaxcodetable -\unexpanded\def\makesyntaxcodetable +\protected\def\makesyntaxcodetable {\begingroup \catcode`\<=13 \catcode`\|=12 \catcode`\!= 0 \catcode`\\=12 @@ -335,11 +335,11 @@ \makesyntaxcodetable -\unexpanded\def\startsyntax {\begingroup\catcodetable\syntaxcodetable \dostartsyntax} -\unexpanded\def\syntax {\begingroup\catcodetable\syntaxcodetable \dosyntax} +\protected\def\startsyntax {\begingroup\catcodetable\syntaxcodetable \dostartsyntax} +\protected\def\syntax {\begingroup\catcodetable\syntaxcodetable \dosyntax} \let\stopsyntax \relax -\unexpanded\def\syntaxenvbody#1% +\protected\def\syntaxenvbody#1% {\par \tt \startnarrower @@ -348,7 +348,7 @@ \stopnarrower \par} -\unexpanded\def\syntaxbody#1% +\protected\def\syntaxbody#1% {\begingroup % \maincolor \tt #1% @@ -433,13 +433,13 @@ \defineregister[nodeindex] \defineregister[libraryindex] -\unexpanded\def\lpr#1{\doifmode{*bodypart}{\primitiveindex[#1]{\bf\tex {#1}}}\tex {#1}} -\unexpanded\def\prm#1{\doifmode{*bodypart}{\primitiveindex[#1]{\tex {#1}}}\tex {#1}} -\unexpanded\def\orm#1{\doifmode{*bodypart}{\primitiveindex[#1]{\tex {#1}}}\tex {#1}} -\unexpanded\def\cbk#1{\doifmode{*bodypart}{\callbackindex [#1]{\type {#1}}}\type{#1}} -\unexpanded\def\nod#1{\doifmode{*bodypart}{\nodeindex [#1]{\bf\type{#1}}}\type{#1}} -\unexpanded\def\whs#1{\doifmode{*bodypart}{\nodeindex [#1]{\type {#1}}}\type{#1}} -\unexpanded\def\noa#1{\doifmode{*bodypart}{\nodeindex [#1]{\type {#1}}}\type{#1}} +\protected\def\lpr#1{\doifmode{*bodypart}{\primitiveindex[#1]{\bf\tex {#1}}}\tex {#1}} +\protected\def\prm#1{\doifmode{*bodypart}{\primitiveindex[#1]{\tex {#1}}}\tex {#1}} +\protected\def\orm#1{\doifmode{*bodypart}{\primitiveindex[#1]{\tex {#1}}}\tex {#1}} +\protected\def\cbk#1{\doifmode{*bodypart}{\callbackindex [#1]{\type {#1}}}\type{#1}} +\protected\def\nod#1{\doifmode{*bodypart}{\nodeindex [#1]{\bf\type{#1}}}\type{#1}} +\protected\def\whs#1{\doifmode{*bodypart}{\nodeindex [#1]{\type {#1}}}\type{#1}} +\protected\def\noa#1{\doifmode{*bodypart}{\nodeindex [#1]{\type {#1}}}\type{#1}} \hyphenation{sub-nodes} @@ -463,14 +463,14 @@ [2] [textstyle=\tttf] -\unexpanded\def\lib #1{\doifmode{*bodypart}{\expanded{\libraryindex{\currentlibraryindex+#1}}}\type{\currentlibraryindex.#1}} -\unexpanded\def\libindex#1{\doifmode{*bodypart}{\expanded{\libraryindex{\currentlibraryindex+#1}}}} -\unexpanded\def\libidx#1#2{\doifmode{*bodypart}{\expanded{\libraryindex{#1+#2}}\type{#1.#2}}} -\unexpanded\def\lix #1#2{\doifmode{*bodypart}{\expanded{\libraryindex{#1+#2}}}} +\protected\def\lib #1{\doifmode{*bodypart}{\expanded{\libraryindex{\currentlibraryindex+#1}}}\type{\currentlibraryindex.#1}} +\protected\def\libindex#1{\doifmode{*bodypart}{\expanded{\libraryindex{\currentlibraryindex+#1}}}} +\protected\def\libidx#1#2{\doifmode{*bodypart}{\expanded{\libraryindex{#1+#2}}\type{#1.#2}}} +\protected\def\lix #1#2{\doifmode{*bodypart}{\expanded{\libraryindex{#1+#2}}}} % \setstructurepageregister[][keys:1=,entries:1=] -\unexpanded\def\inlineluavalue#1% +\protected\def\inlineluavalue#1% {{\maincolor \ctxlua { local t = #1 if type(t) == "table" then @@ -481,4 +481,74 @@ context.typ(t) }}} +% Common: + +% Added December 9 2020 after being energized by Becca Stevens's Slow Burn music +% video: interesting what comes out of top musicians working remote. + +\startluacode +local list = token.getprimitives() + +-- redo this: + +function document.showprimitives(tag) + local t = tex.extraprimitives(tag) + table.sort(t) + for i=1,#t do + local v = t[i] + if v ~= ' ' and v ~= "/" and v ~= "-" then + context.type(v) + context.space() + end + end +end + +-- inspect(tokens.commands) + +function document.filteredprimitives(cmd) + local t = { } + local c = tokens.commands[cmd] + for i=1,#list do + local l = list[i] + if l[1] == c then + t[#t+1] = l[3] + end + end + table.sort(t) + for i=1,#t do + if i > 1 then + context(", ") + elseif i == #t then + context(" and ") + end + context.typ(t[i]) + end +end + +function document.allprimitives() + local c = tokens.commands + local o = tex.getprimitiveorigins() + table.sort(list, function(a,b) + return a[3] < b[3] + end) + context.starttabulate { "|T|T|Tc|T|" } + context.DB() context("primitive") + context.BC() context("command code") + context.BC() context("char code") + context.BC() context("origin") + context.NC() context.NR() + context.TB() + for i=1,#list do + local l = list[i] + context.NC() context.tex(l[3]) + context.NC() context(c[l[1]]) + context.NC() context(l[2]) + context.NC() context(o[l[4]]) + context.NC() context.NR() + end + context.LL() + context.stoptabulate() +end +\stopluacode + \stopenvironment diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex b/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex index 0ef178526..e19e7b8fb 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex @@ -4,6 +4,16 @@ \startcomponent luametatex-tex +% { "getbytecode", lualib_get_bytecode }, +% { "setbytecode", lualib_set_bytecode }, +% { "callbytecode", lualib_call_bytecode }, +% { "getfunctionstable", lualib_get_functions_table }, +% { "getstartupfile", lualib_get_startupfile }, +% { "getversion", lualib_get_version }, +% { "setexitcode", lualib_set_exitcode }, +% { NULL, NULL }, + + \startchapter[reference=tex,title={The \TEX\ related libraries}] \startsection[title={The \type {lua} library}][library=lua] @@ -11,21 +21,28 @@ \startsubsection[title={Version information}] \topicindex{libraries+\type{lua}} -\topicindex{version} -\topicindex{startupfile} +\topicindex{getversion} +\topicindex{getstartupfile} -\libindex{version} -\libindex{startupfile} +\libindex{getversion} +\libindex{getstartupfile} -This library contains two read|-|only items: +This version of the used \LUA\ interpreter (currently {\tttf \cldcontext +{lua.getversion()}}) can be queried with: \starttyping -<string> v = lua.version -<string> s = lua.startupfile +<string> v = lua.getversion() \stoptyping -This returns the \LUA\ version identifier string. The value currently is -\directlua {tex.print(lua.version)}. +The name of used startup file, if at all, is returned by: + +\starttyping +<string> s = lua.getstartupfile() +\stoptyping + +For this document the reported value is: + +\blank {\ttx \cldcontext {lua.getstartupfile()}} \blank \stopsubsection @@ -60,6 +77,7 @@ This preallocates 100 hash entries and 5000 index entries. The \type \libindex{bytecode} \libindex{setbytecode} \libindex{getbytecode} +\libindex{callbytecode} \LUA\ registers can be used to store \LUA\ code chunks. The accepted values for assignments are functions and \type {nil}. Likewise, the retrieved value is @@ -67,7 +85,7 @@ either a function or \type {nil}. \starttyping lua.bytecode[<number> n] = <function> f -lua.bytecode[<number> n]() +<function> f = lua.bytecode[<number> n] % -- f() \stoptyping The contents of the \type {lua.bytecode} array is stored inside the format file @@ -75,8 +93,8 @@ as actual \LUA\ bytecode, so it can also be used to preload \LUA\ code. The function must not contain any upvalues. The associated function calls are: \startfunctioncall -<function> f = lua.getbytecode(<number> n) lua.setbytecode(<number> n, <function> f) +<function> f = lua.getbytecode(<number> n) \stopfunctioncall Note: Since a \LUA\ file loaded using \type {loadfile(filename)} is essentially @@ -84,14 +102,14 @@ an anonymous function, a complete file can be stored in a bytecode register like this: \startfunctioncall -lua.bytecode[n] = loadfile(filename) +lua.setbytecode(n,loadfile(filename)) \stopfunctioncall Now all definitions (functions, variables) contained in the file can be created by executing this bytecode register: \startfunctioncall -lua.bytecode[n]() +lua.callbytecode(n) \stopfunctioncall Note that the path of the file is stored in the \LUA\ bytecode to be used in @@ -142,96 +160,87 @@ and values as a table. <table> info = status.list() \stopfunctioncall -The keys in the table are the known items, the value is the current value. Almost -all of the values in \type {status} are fetched through a metatable at run|-|time -whenever they are accessed, so you cannot use \type {pairs} on \type {status}, -but you {\it can\/} use \type {pairs} on \type {info}, of course. If you do not -need the full list, you can also ask for a single item by using its name as an -index into \type {status}. The current list is: +The keys in the table are the known items, the value is the current value. There are +toplevel items and items that are tables with subentries. The current list is: -{\em This table is under reconstrction!} +\startluacode + local list = status.list() + + context.starttabulate { "|Tw(10em)|Tp|" } + context.DB() + context("toplevel statistics") + context.BC() + context.NC() + context.NR() + context.TB() + for k, v in table.sortedhash(list) do + if type(v) ~= "table" then + context.NC() + context(k) + context.NC() + context(tostring(v)) + context.NC() + context.NR() + end + end + context.LL() + context.stoptabulate() + + for k, v in table.sortedhash(list) do + if type(v) == "table" then + context.starttabulate { "|Tw(10em)|Tp|" } + context.DB() + context(k ..".*") + context.BC() + context.NC() + context.NR() + context.TB() + for k, v in table.sortedhash(v) do + context.NC() + context(k) + context.NC() + context(v == "" and "unset" or tostring(v)) + context.NC() + context.NR() + end + context.LL() + context.stoptabulate() + end + end +\stopluacode -\starttabulate[|l|p|] -\DB key \BC explanation \NC \NR -\TB -\NC \type{banner} \NC terminal display banner \NC \NR -\NC \type{buf_size} \NC current allocated size of the line buffer \NC \NR -\NC \type{callbacks} \NC total number of executed callbacks so far \NC \NR -\NC \type{cs_count} \NC number of control sequences \NC \NR -\NC \type{dest_names_size} \NC \PDF\ destination table size \NC \NR -\NC \type{dvi_gone} \NC written \DVI\ bytes \NC \NR -\NC \type{dvi_ptr} \NC not yet written \DVI\ bytes \NC \NR -\NC \type{dyn_used} \NC token (multi|-|word) memory in use \NC \NR -\NC \type{filename} \NC name of the current input file \NC \NR -\NC \type{fix_mem_end} \NC maximum number of used tokens \NC \NR -\NC \type{fix_mem_min} \NC minimum number of allocated words for tokens \NC \NR -\NC \type{fix_mem_max} \NC maximum number of allocated words for tokens \NC \NR -\NC \type{font_ptr} \NC number of active fonts \NC \NR -\NC \type{hash_extra} \NC extra allowed hash \NC \NR -\NC \type{hash_size} \NC size of hash \NC \NR -\NC \type{indirect_callbacks} \NC number of those that were themselves a result of other callbacks (e.g. file readers) \NC \NR -\NC \type{ini_version} \NC \type {true} if this is an \INITEX\ run \NC \NR -\NC \type{init_pool_ptr} \NC \INITEX\ string pool index \NC \NR -\NC \type{init_str_ptr} \NC number of \INITEX\ strings \NC \NR -\NC \type{input_ptr} \NC the level of input we're at \NC \NR -\NC \type{inputid} \NC numeric id of the current input \NC \NR -\NC \type{largest_used_mark} \NC max referenced marks class \NC \NR -\NC \type{lasterrorcontext} \NC last error context string (with newlines) \NC \NR -\NC \type{lasterrorstring} \NC last \TEX\ error string \NC \NR -\NC \type{lastluaerrorstring} \NC last \LUA\ error string \NC \NR -\NC \type{lastwarningstring} \NC last warning tag, normally an indication of in what part\NC \NR -\NC \type{lastwarningtag} \NC last warning string\NC \NR -\NC \type{linenumber} \NC location in the current input file \NC \NR -\NC \type{log_name} \NC name of the log file \NC \NR -\NC \type{luabytecode_bytes} \NC number of bytes in \LUA\ bytecode registers \NC \NR -\NC \type{luabytecodes} \NC number of active \LUA\ bytecode registers \NC \NR -\NC \type{luastate_bytes} \NC number of bytes in use by \LUA\ interpreters \NC \NR -\NC \type{luatex_engine} \NC the \LUATEX\ engine identifier \NC \NR -\NC \type{luatex_hashchars} \NC length to which \LUA\ hashes strings ($2^n$) \NC \NR -\NC \type{luatex_hashtype} \NC the hash method used (in \LUAJITTEX) \NC \NR -\NC \type{luatex_version} \NC the \LUATEX\ version number \NC \NR -\NC \type{luatex_revision} \NC the \LUATEX\ revision string \NC \NR -\NC \type{max_buf_stack} \NC max used buffer position \NC \NR -\NC \type{max_in_stack} \NC max used input stack entries \NC \NR -\NC \type{max_nest_stack} \NC max used nesting stack entries \NC \NR -\NC \type{max_param_stack} \NC max used parameter stack entries \NC \NR -\NC \type{max_save_stack} \NC max used save stack entries \NC \NR -\NC \type{max_strings} \NC maximum allowed strings \NC \NR -\NC \type{nest_size} \NC nesting stack size \NC \NR -\NC \type{node_mem_usage} \NC a string giving insight into currently used nodes \NC \NR -\NC \type{obj_ptr} \NC max \PDF\ object pointer \NC \NR -\NC \type{obj_tab_size} \NC \PDF\ object table size \NC \NR -\NC \type{output_active} \NC \type {true} if the \prm {output} routine is active \NC \NR -\NC \type{output_file_name} \NC name of the \PDF\ or \DVI\ file \NC \NR -\NC \type{param_size} \NC parameter stack size \NC \NR -\NC \type{pdf_dest_names_ptr} \NC max \PDF\ destination pointer \NC \NR -\NC \type{pdf_gone} \NC written \PDF\ bytes \NC \NR -\NC \type{pdf_mem_ptr} \NC max \PDF\ memory used \NC \NR -\NC \type{pdf_mem_size} \NC \PDF\ memory size \NC \NR -\NC \type{pdf_os_cntr} \NC max \PDF\ object stream pointer \NC \NR -\NC \type{pdf_os_objidx} \NC \PDF\ object stream index \NC \NR -\NC \type{pdf_ptr} \NC not yet written \PDF\ bytes \NC \NR -\NC \type{pool_ptr} \NC string pool index \NC \NR -\NC \type{pool_size} \NC current size allocated for string characters \NC \NR -\NC \type{save_size} \NC save stack size \NC \NR -\NC \type{shell_escape} \NC \type {0} means disabled, \type {1} means anything is permitted, and \type {2} is restricted \NC \NR -\NC \type{safer_option} \NC \type {1} means safer is enforced \NC \NR -\NC \type{kpse_used} \NC \type {1} means that kpse is used \NC \NR -\NC \type{stack_size} \NC input stack size \NC \NR -\NC \type{str_ptr} \NC number of strings \NC \NR -\NC \type{total_pages} \NC number of written pages \NC \NR -\NC \type{var_mem_max} \NC number of allocated words for nodes \NC \NR -\NC \type{var_used} \NC variable (one|-|word) memory in use \NC \NR -\NC \type{lc_collate} \NC the value of \type {LC_COLLATE} at startup time (becomes \type {C} at startup) \NC \NR -\NC \type{lc_ctype} \NC the value of \type {LC_CTYPE} at startup time (becomes \type {C} at startup) \NC \NR -%NC \type{lc_monetary} \NC the value of \type {LC_MONETARY} at startup time \NC \NR -\NC \type{lc_numeric} \NC the value of \type {LC_NUMERIC} at startup time \NC \NR -%NC \type{lc_time} \NC the value of \type {LC_TIME} at startup time (becomes \type {C} at startup) \NC \NR -\LL -\stoptabulate +There are also getters for the subtables. The whole repertoire of functions in +the \type {status} table is: {\tttf \cldcontext {table . concat ( table . +sortedkeys (status), ", ")}}. The error and warning messages can be wiped with +the \type {resetmessages} function. The states in subtables relate to memory +management and are mostly there for development purposes. -The error and warning messages can be wiped with the \type {resetmessages} -function. A return value can be set with \type {setexitcode}. +The \type {getconstants} query gives back a table with all kind of internal +quantities and again these are only relevant for diagnostic and development +purposes. Many are good old \TEX\ constants that are describes in the original +documentation of the source but some are definitely \LUAMETATEX\ specific. + +\startluacode + context.starttabulate { "|Tw(15em)|Tp|" } + context.DB() + context("constants.*") + context.BC() + context.NC() + context.NR() + context.TB() + for k, v in table.sortedhash(status.getconstants()) do + if type(v) ~= "table" then + context.NC() + context(k) + context.NC() + context(tostring(v)) + context.NC() + context.NR() + end + end + context.LL() + context.stoptabulate() +\stopluacode \stopsection @@ -261,7 +270,8 @@ readable (like \prm {tracingcommands} and \prm {parindent}). \topicindex{parameters+internal} -\libindex{set} \libindex{get} +\libindex{set} +\libindex{get} For all the parameters in this section, it is possible to access them directly using their names as index in the \type {tex} table, or by using one of the @@ -284,156 +294,8 @@ get all five values. Otherwise you get a node which is a copy of the internal value so you are responsible for its freeing at the \LUA\ end. When you set a glue quantity you can either pass a \nod {glue_spec} or upto five numbers. -\subsubsection{Integer parameters} - -The integer parameters accept and return \LUA\ numbers. These are read|-|write: - -\starttwocolumns -\starttyping -tex.adjdemerits -tex.binoppenalty -tex.brokenpenalty -tex.catcodetable -tex.clubpenalty -tex.day -tex.defaulthyphenchar -tex.defaultskewchar -tex.delimiterfactor -tex.displaywidowpenalty -tex.doublehyphendemerits -tex.endlinechar -tex.errorcontextlines -tex.escapechar -tex.exhyphenpenalty -tex.fam -tex.finalhyphendemerits -tex.floatingpenalty -tex.globaldefs -tex.hangafter -tex.hbadness -tex.holdinginserts -tex.hyphenpenalty -tex.interlinepenalty -tex.language -tex.lastlinefit -tex.lefthyphenmin -tex.linepenalty -tex.localbrokenpenalty -tex.localinterlinepenalty -tex.looseness -tex.mag -tex.maxdeadcycles -tex.month -tex.newlinechar -tex.outputpenalty -tex.pausing -tex.postdisplaypenalty -tex.predisplaydirection -tex.predisplaypenalty -tex.pretolerance -tex.relpenalty -tex.righthyphenmin -tex.savinghyphcodes -tex.savingvdiscards -tex.showboxbreadth -tex.showboxdepth -tex.time -tex.tolerance -tex.tracingassigns -tex.tracingcommands -tex.tracinggroups -tex.tracingifs -tex.tracinglostchars -tex.tracingmacros -tex.tracingnesting -tex.tracingonline -tex.tracingoutput -tex.tracingpages -tex.tracingparagraphs -tex.tracingrestores -tex.tracingscantokens -tex.tracingstats -tex.uchyph -tex.vbadness -tex.widowpenalty -tex.year -\stoptyping -\stoptwocolumns - -These are read|-|only: - -\startthreecolumns -\starttyping -tex.deadcycles -tex.insertpenalties -tex.parshape -tex.interlinepenalties -tex.clubpenalties -tex.widowpenalties -tex.displaywidowpenalties -tex.prevgraf -tex.spacefactor -\stoptyping -\stopthreecolumns - -\subsubsection{Dimension parameters} - -The dimension parameters accept \LUA\ numbers (signifying scaled points) or -strings (with included dimension). The result is always a number in scaled -points. These are read|-|write: - -\startthreecolumns -\starttyping -tex.boxmaxdepth -tex.delimitershortfall -tex.displayindent -tex.displaywidth -tex.emergencystretch -tex.hangindent -tex.hfuzz -tex.hoffset -tex.hsize -tex.lineskiplimit -tex.mathsurround -tex.maxdepth -tex.nulldelimiterspace -tex.overfullrule -tex.pagebottomoffset -tex.pageheight -tex.pageleftoffset -tex.pagerightoffset -tex.pagetopoffset -tex.pagewidth -tex.parindent -tex.predisplaysize -tex.scriptspace -tex.splitmaxdepth -tex.vfuzz -tex.voffset -tex.vsize -tex.prevdepth -tex.prevgraf -tex.spacefactor -\stoptyping -\stopthreecolumns - -These are read|-|only: - -\startthreecolumns -\starttyping -tex.pagedepth -tex.pagefilllstretch -tex.pagefillstretch -tex.pagefilstretch -tex.pagegoal -tex.pageshrink -tex.pagestretch -tex.pagetotal -\stoptyping -\stopthreecolumns - -Beware: as with all \LUA\ tables you can add values to them. So, the following is -valid: +Beware: as with regular \LUA\ tables you can add values to the \type {tex} table. +So, the following is valid: \starttyping tex.foo = 123 @@ -465,70 +327,52 @@ as there is no way \LUATEX\ can guess your intentions. By using the accessor in the \type {tex} tables, you get and set the values at the top of the nesting stack. -\subsubsection{Direction parameters} +\subsubsection{Integer parameters} -The direction states can be queried and set with: +The integer parameters accept and return \LUA\ integers. In some cases the values +are checked, trigger other settings or result in some immediate change of +behaviour: \ctxlua {document.filteredprimitives ("internal_int")}. -\startthreecolumns -\starttyping -tex.gettextdir() -tex.getlinedir() -tex.getmathdir() -tex.getpardir() -tex.settextdir(<number>) -tex.setlinedir(<number>) -tex.setmathdir(<number>) -tex.setpardir(<number>) -\stoptyping -\stopthreecolumns +Some integer parameters are read only, because they are actually referring not to +some internal integer register but to an engine property: \typ {deadcycles}, +\typ {insertpenalties}, \typ {parshape}, \typ {interlinepenalties}, \typ +{clubpenalties}, \typ {widowpenalties}, \typ {displaywidowpenalties}, \typ +{prevgraf} and \typ {spacefactor}. -and also with: +\subsubsection{Dimension parameters} -\startthreecolumns -\starttyping -tex.textdirection -tex.linedirection -tex.mathdirection -tex.pardirection -\stoptyping -\stopthreecolumns +The dimension parameters accept \LUA\ numbers (signifying scaled points) or +strings (with included dimension). The result is always a number in scaled +points. These are read|-|write: \ctxlua {document.filteredprimitives +("internal_dimen")}. +These are read|-|only: \typ {pagedepth}, \typ {pagefilllstretch}, \typ +{pagefillstretch}, \typ {pagefilstretch}, \typ {pagegoal}, \typ {pageshrink}, +\typ {pagestretch} and \typ {pagetotal}. -\subsubsection{Glue parameters} +\subsubsection{Direction parameters} -The glue parameters accept and return a userdata object that represents a \nod {glue_spec} node. +The direction states can be queried with: \typ {gettextdir}, \typ {getlinedir}, +\typ {getmathdir} and \typ {getpardir}. You can set them with \typ +{settextdir}, \typ {setlinedir}, \typ {setmathdir} and \typ {setpardir}, +commands that accept a number. You can also set these parameters as table +key|/|values: \typ {textdirection}, \typ {linedirection}, \typ {mathdirection} +and \typ {pardirection}, so the next code sets the text direction to \typ +{r2l}: -\startthreecolumns \starttyping -tex.abovedisplayshortskip -tex.abovedisplayskip -tex.baselineskip -tex.belowdisplayshortskip -tex.belowdisplayskip -tex.leftskip -tex.lineskip -tex.parfillskip -tex.parskip -tex.rightskip -tex.spaceskip -tex.splittopskip -tex.tabskip -tex.topskip -tex.xspaceskip +tex.textdirection = 1 \stoptyping -\stopthreecolumns -\subsubsection{Muglue parameters} +\subsubsection{Glue parameters} -All muglue parameters are to be used read|-|only and return a \LUA\ string. +The internal glue parameters accept and return a userdata object that represents +a \nod {glue_spec} node: \ctxlua {document.filteredprimitives ("internal_glue")}. -\startthreecolumns -\starttyping -tex.medmuskip -tex.thickmuskip -tex.thinmuskip -\stoptyping -\stopthreecolumns +\subsubsection{Muglue parameters} + +All muglue parameters are to be used read|-|only and return a \LUA\ string +\ctxlua {document.filteredprimitives ("internal_mu_glue")}. \subsubsection{Tokenlist parameters} @@ -537,22 +381,8 @@ converted to and from token lists using \prm {the} \prm {toks} style expansion: all category codes are either space (10) or other (12). It follows that assigning to some of these, like \quote {tex.output}, is actually useless, but it feels bad to make exceptions in view of a coming extension that will accept full|-|blown -token strings. - -\startthreecolumns -\starttyping -tex.errhelp -tex.everycr -tex.everydisplay -tex.everyeof -tex.everyhbox -tex.everyjob -tex.everymath -tex.everypar -tex.everyvbox -tex.output -\stoptyping -\stopthreecolumns +token strings. Here is the lot: \ctxlua {document.filteredprimitives +("internal_toks")}. \stopsubsection @@ -561,55 +391,23 @@ tex.output \topicindex{convert commands} All \quote {convert} commands are read|-|only and return a \LUA\ string. The -supported commands at this moment are: - -\starttwocolumns -\starttyping -tex.formatname -tex.jobname -tex.luatexbanner -tex.luatexrevision -tex.fontname(number) -tex.uniformdeviate(number) -tex.number(number) -tex.romannumeral(number) -tex.fontidentifier(number) -\stoptyping -\stoptwocolumns - -If you are wondering why this list looks haphazard; these are all the cases of -the \quote {convert} internal command that do not require an argument, as well as -the ones that require only a simple numeric value. The special (\LUA|-|only) case -of \type {tex.fontidentifier} returns the \type {csname} string that matches a -font id number (if there is one). +supported commands at this moment are: \ctxlua {document.filteredprimitives +("convert")}. You will get an error message if an operation is not (yet) +permitted. Some take an string or number argument, just like at the \TEX\ end +some extra input is expected. \stopsubsection -\startsubsection[title={Last item commands}] +\startsubsection[title={Item commands}] \topicindex{last items} -All \quote {last item} commands are read|-|only and return a number. The -supported commands at this moment are: - -\startthreecolumns -\starttyping -tex.lastpenalty -tex.lastkern -tex.lastskip -tex.lastnodetype -tex.inputlineno -tex.lastxpos -tex.lastypos -tex.randomseed -tex.luatexversion -tex.currentgrouplevel -tex.currentgrouptype -tex.currentiflevel -tex.currentiftype -tex.currentifbranch -\stoptyping -\stopthreecolumns +All so called \quote {item} commands are read|-|only and return a number. The +complete list of these commands is: \ctxlua {document.filteredprimitives +("some_item")}. No all are currently supported but eventually that might be the +case. Like the lists in previous sections, there are differences between \LUATEX\ +and \LUAMETATEX, where some commands are organized differently in order to +provide a consistent \LUA\ interface. \stopsubsection @@ -1650,20 +1448,6 @@ given by the requested string value(s). The possible values and their (current) return values are given in the following table. In addition the somewhat special primitives \quote{\tex{ }}, \quote{\tex {/}} and \quote{\type {-}} are defined. -\startluacode -function document.showprimitives(tag) - local t = tex.extraprimitives(tag) - table.sort(t) - for i=1,#t do - local v = t[i] - if v ~= ' ' and v ~= "/" and v ~= "-" then - context.type(v) - context.space() - end - end -end -\stopluacode - \starttabulate[|l|pl|] \DB name \BC values \NC \NR \TB diff --git a/doc/context/sources/general/manuals/luametatex/luametatex.tex b/doc/context/sources/general/manuals/luametatex/luametatex.tex index b2de45ae2..63a50d000 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex.tex @@ -115,6 +115,7 @@ \startbackmatter % \component luametatex-rejected % local file + \component luametatex-codes \component luametatex-registers \component luametatex-statistics \stopbackmatter diff --git a/metapost/context/base/mpii/mp-tool.mpii b/metapost/context/base/mpii/mp-tool.mpii index de8e1e8e2..27c90de38 100644 --- a/metapost/context/base/mpii/mp-tool.mpii +++ b/metapost/context/base/mpii/mp-tool.mpii @@ -1841,7 +1841,9 @@ enddef ; primarydef p enlonged len = begingroup - if pair p : + if len == 0 : + p + elseif pair p : save q ; path q ; q := origin -- p ; save al ; al := arclength(q) ; if al > 0 : @@ -1864,8 +1866,12 @@ enddef ; % drawarrow p withcolor red ; % drawarrow p shortened 1cm withcolor green ; +% primarydef p shortened d = +% reverse ( ( reverse (p enlonged -d) ) enlonged -d ) +% enddef ; + primarydef p shortened d = - reverse ( ( reverse (p enlonged -d) ) enlonged -d ) + reverse ( ( reverse (p enlonged -xpart paired(d)) ) enlonged -ypart paired(d) ) enddef ; % yes or no, untested -) diff --git a/metapost/context/base/mpiv/mp-tool.mpiv b/metapost/context/base/mpiv/mp-tool.mpiv index 5ace55d75..903438b72 100644 --- a/metapost/context/base/mpiv/mp-tool.mpiv +++ b/metapost/context/base/mpiv/mp-tool.mpiv @@ -2205,7 +2205,9 @@ enddef ; primarydef p enlonged len = begingroup - if pair p : + if len == 0 : + p + elseif pair p : save q ; path q ; q := origin -- p ; save al ; al := arclength(q) ; if al > 0 : @@ -2228,8 +2230,12 @@ enddef ; % drawarrow p withcolor red ; % drawarrow p shortened 1cm withcolor green ; +% primarydef p shortened d = +% reverse ( ( reverse (p enlonged -d) ) enlonged -d ) +% enddef ; + primarydef p shortened d = - reverse ( ( reverse (p enlonged -d) ) enlonged -d ) + reverse ( ( reverse (p enlonged -xpart paired(d)) ) enlonged -ypart paired(d) ) enddef ; % yes or no, untested -) diff --git a/scripts/context/lua/mtx-install.lua b/scripts/context/lua/mtx-install.lua index e0c51eccf..94c47e9db 100644 --- a/scripts/context/lua/mtx-install.lua +++ b/scripts/context/lua/mtx-install.lua @@ -120,8 +120,10 @@ local platforms = { ["macosx"] = "osx-64", ["osx"] = "osx-64", ["osx-64"] = "osx-64", - ["osx-arm"] = "osx-arm64", - ["osx-arm64"] = "osx-arm64", +-- ["osx-arm"] = "osx-arm64", +-- ["osx-arm64"] = "osx-arm64", + ["osx-arm"] = "osx-64", + ["osx-arm64"] = "osx-64", -- -- ["solaris-intel"] = "solaris-intel", -- diff --git a/tex/context/base/mkii/cont-new.mkii b/tex/context/base/mkii/cont-new.mkii index 02a4e9ee1..79a8f46d6 100644 --- a/tex/context/base/mkii/cont-new.mkii +++ b/tex/context/base/mkii/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2020.12.09 10:48} +\newcontextversion{2020.12.10 22:23} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/mkii/context.mkii b/tex/context/base/mkii/context.mkii index 64f3f8425..80a42b727 100644 --- a/tex/context/base/mkii/context.mkii +++ b/tex/context/base/mkii/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2020.12.09 10:48} +\edef\contextversion{2020.12.10 22:23} %D For those who want to use this: diff --git a/tex/context/base/mkiv/cont-new.mkiv b/tex/context/base/mkiv/cont-new.mkiv index 47295c850..5b7f4831b 100644 --- a/tex/context/base/mkiv/cont-new.mkiv +++ b/tex/context/base/mkiv/cont-new.mkiv @@ -13,7 +13,7 @@ % \normalend % uncomment this to get the real base runtime -\newcontextversion{2020.12.09 10:48} +\newcontextversion{2020.12.10 22:23} %D This file is loaded at runtime, thereby providing an excellent place for hacks, %D patches, extensions and new features. There can be local overloads in cont-loc diff --git a/tex/context/base/mkiv/context.mkiv b/tex/context/base/mkiv/context.mkiv index 4ff38fa11..8b645e33a 100644 --- a/tex/context/base/mkiv/context.mkiv +++ b/tex/context/base/mkiv/context.mkiv @@ -45,7 +45,7 @@ %D {YYYY.MM.DD HH:MM} format. \edef\contextformat {\jobname} -\edef\contextversion{2020.12.09 10:48} +\edef\contextversion{2020.12.10 22:23} %D Kind of special: diff --git a/tex/context/base/mkiv/font-ots.lua b/tex/context/base/mkiv/font-ots.lua index 38044cac6..617c249b3 100644 --- a/tex/context/base/mkiv/font-ots.lua +++ b/tex/context/base/mkiv/font-ots.lua @@ -13,7 +13,7 @@ with plain <l n='tex'/> it has to be so. This module is part of <l n='context'/> and discussion about improvements and functionality mostly happens on the <l n='context'/> mailing list.</p> -<p>The specification of OpenType is (or at least a decade ago was) kind of vague. +<p>The specification of OpenType is (or at least decades ago was) kind of vague. Apart from a lack of a proper free specifications there's also the problem that Microsoft and Adobe may have their own interpretation of how and in what order to apply features. In general the Microsoft website has more detailed specifications diff --git a/tex/context/base/mkiv/node-ltp.lua b/tex/context/base/mkiv/node-ltp.lua index cf1d662a3..6cb9e8d10 100644 --- a/tex/context/base/mkiv/node-ltp.lua +++ b/tex/context/base/mkiv/node-ltp.lua @@ -286,7 +286,6 @@ local par_code = nodecodes.par local protrusionboundary_code = nodes.boundarycodes.protrusion local leaders_code = nodes.gluecodes.leaders local indentlist_code = nodes.listcodes.indent -local ligatureglyph_code = nodes.glyphcodes.ligature local cancel_code = nodes.dircodes.cancel local userkern_code = kerncodes.userkern diff --git a/tex/context/base/mkiv/status-files.pdf b/tex/context/base/mkiv/status-files.pdf Binary files differindex 8ba106da0..709b9330d 100644 --- a/tex/context/base/mkiv/status-files.pdf +++ b/tex/context/base/mkiv/status-files.pdf diff --git a/tex/context/base/mkiv/status-lua.pdf b/tex/context/base/mkiv/status-lua.pdf Binary files differindex 92073e370..37f1aff8a 100644 --- a/tex/context/base/mkiv/status-lua.pdf +++ b/tex/context/base/mkiv/status-lua.pdf diff --git a/tex/context/base/mkiv/symb-ini.lua b/tex/context/base/mkiv/symb-ini.lua index 588e5d2ab..c0e717f46 100644 --- a/tex/context/base/mkiv/symb-ini.lua +++ b/tex/context/base/mkiv/symb-ini.lua @@ -21,7 +21,6 @@ local report_symbols = logs.reporter ("fonts","symbols") local status_symbols = logs.messenger("fonts","symbols") local patterns = { - CONTEXTLMTXMODE > 0 and "symb-imp-%s.mkxl" or "", "symb-imp-%s.mkiv", "symb-imp-%s.tex", -- obsolete: diff --git a/tex/context/base/mkxl/cont-new.mkxl b/tex/context/base/mkxl/cont-new.mkxl index 187b14f89..298dbc016 100644 --- a/tex/context/base/mkxl/cont-new.mkxl +++ b/tex/context/base/mkxl/cont-new.mkxl @@ -13,7 +13,7 @@ % \normalend % uncomment this to get the real base runtime -\newcontextversion{2020.12.09 10:48} +\newcontextversion{2020.12.10 22:23} %D This file is loaded at runtime, thereby providing an excellent place for hacks, %D patches, extensions and new features. There can be local overloads in cont-loc diff --git a/tex/context/base/mkxl/context.mkxl b/tex/context/base/mkxl/context.mkxl index 959f6951d..238271819 100644 --- a/tex/context/base/mkxl/context.mkxl +++ b/tex/context/base/mkxl/context.mkxl @@ -29,7 +29,7 @@ %D {YYYY.MM.DD HH:MM} format. \immutable\edef\contextformat {\jobname} -\immutable\edef\contextversion{2020.12.09 10:48} +\immutable\edef\contextversion{2020.12.10 22:23} %overloadmode 1 % check frozen / warning %overloadmode 2 % check frozen / error diff --git a/tex/context/base/mkxl/font-lib.mklx b/tex/context/base/mkxl/font-lib.mklx index d187d3a05..b6daca3c7 100644 --- a/tex/context/base/mkxl/font-lib.mklx +++ b/tex/context/base/mkxl/font-lib.mklx @@ -40,10 +40,10 @@ \registerctxluafile{font-ott}{} % otf tables (first) \registerctxluafile{font-otl}{} \registerctxluafile{font-oto}{} -\registerctxluafile{font-otj}{optimize} +\registerctxluafile{font-otj}{autosuffix,optimize} \registerctxluafile{font-oup}{} \registerctxluafile{font-ota}{autosuffix} -\registerctxluafile{font-ots}{optimize} +\registerctxluafile{font-ots}{autosuffix,optimize} \registerctxluafile{font-otd}{optimize} \registerctxluafile{font-otc}{} \registerctxluafile{font-oth}{} diff --git a/tex/context/base/mkxl/font-ota.lmt b/tex/context/base/mkxl/font-ota.lmt index 8f6c059ef..6475c57e6 100644 --- a/tex/context/base/mkxl/font-ota.lmt +++ b/tex/context/base/mkxl/font-ota.lmt @@ -39,6 +39,8 @@ local setprop = nuts.setprop local getsubtype = nuts.getsubtype local getchar = nuts.getchar local ischar = nuts.ischar +local isnextchar = nuts.isnextchar +----- isprevchar = nuts.isprevchar local end_of_math = nuts.end_of_math @@ -178,7 +180,8 @@ function analyzers.setstate(head,font) -- latin local done = false -- only letters while current do - local char, id = ischar(current,font) + -- local char, id = ischar(current,font) + local nxt, char, id = isnextchar(current,font) if char then if not getstate(current) then -- local d = descriptions[char] @@ -213,7 +216,8 @@ function analyzers.setstate(head,font) -- latin end first = false if id == math_code then - current = end_of_math(current) + -- current = end_of_math(current) + nxt = getnext(end_of_math(current)) end elseif id == disc_code then -- always in the middle .. it doesn't make much sense to assign a property @@ -231,7 +235,8 @@ function analyzers.setstate(head,font) -- latin end end ::NEXT:: - current = getnext(current) + -- current = getnext(current) + current = nxt end if first then setstate(last,first == last and s_isol or s_fina) @@ -311,7 +316,8 @@ function methods.arab(head,font,attr) local done = false current = tonut(current) while current do - local char, id = ischar(current,font) + -- local char, id = ischar(current,font) + local nxt, char, id = isnextchar(current,font) if char and not getstate(current) then done = true local classifier = classifiers[char] @@ -424,10 +430,12 @@ function methods.arab(head,font,attr) first = nil end if id == math_code then -- a bit duplicate as we test for glyphs twice - current = end_of_math(current) + -- current = end_of_math(current) + nxt = getnext(end_of_math(current)) end end - current = getnext(current) + -- current = getnext(current) + current = nxt end if last then if c_last == s_medi or c_last == s_fina then @@ -498,7 +506,8 @@ do end while current do - local char, id = ischar(current,font) + -- local char, id = ischar(current,font) + local nxt, char, id = isnextchar(current,font) if char and not getstate(current) then local currjoin = joining[char] done = true @@ -551,10 +560,12 @@ do wrapup() end if id == math_code then -- a bit duplicate as we test for glyphs twice - current = end_of_math(current) + -- current = end_of_math(current) + nxt = getnext(end_of_math(current)) end end - current = getnext(current) + -- current = getnext(current) + current = nxt end if last then wrapup() diff --git a/tex/context/base/mkxl/font-otj.lmt b/tex/context/base/mkxl/font-otj.lmt new file mode 100644 index 000000000..7661695b5 --- /dev/null +++ b/tex/context/base/mkxl/font-otj.lmt @@ -0,0 +1,1800 @@ +if not modules then modules = { } end modules ['font-otj'] = { + version = 1.001, + optimize = true, + comment = "companion to font-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- This property based variant is not faster but looks nicer than the attribute one. We +-- need to use rawget (which is about 4 times slower than a direct access but we cannot +-- get/set that one for our purpose! This version does a bit more with discretionaries +-- (and Kai has tested it with his collection of weird fonts.) + +-- There is some duplicate code here (especially in the the pre/post/replace branches) but +-- we go for speed. We could store a list of glyph and mark nodes when registering but it's +-- cleaner to have an identification pass here. Also, I need to keep tracing in mind so +-- being too clever here is dangerous. + +-- As we have a rawget on properties we don't need one on injections. + +-- The use_advance code was just a test and is meant for testing and manuals. There is no +-- performance (or whatever) gain and using kerns is somewhat cleaner (at least for now). + +-- An alternative is to have a list per base of all marks and then do a run over the node +-- list that resolves the accumulated l/r/x/y and then do an inject pass. + +-- if needed we can flag a kern node as immutable + +-- The thing with these positioning options is that it is not clear what Uniscribe does with +-- the 2rl flag and we keep oscillating a between experiments. + +if not nodes.properties then return end + +local next, rawget, tonumber = next, rawget, tonumber +local fastcopy = table.fastcopy + +local registertracker = trackers.register +local registerdirective = directives.register + +local trace_injections = false registertracker("fonts.injections", function(v) trace_injections = v end) +local trace_marks = false registertracker("fonts.injections.marks", function(v) trace_marks = v end) +local trace_cursive = false registertracker("fonts.injections.cursive", function(v) trace_cursive = v end) +local trace_spaces = false registertracker("fonts.injections.spaces", function(v) trace_spaces = v end) + +-- local fix_cursive_marks = false +-- +-- registerdirective("fonts.injections.fixcursivemarks", function(v) +-- fix_cursive_marks = v +-- end) + +local report_injections = logs.reporter("fonts","injections") +local report_spaces = logs.reporter("fonts","spaces") + +local attributes, nodes, node = attributes, nodes, node + +fonts = fonts +local hashes = fonts.hashes +local fontdata = hashes.identifiers +local fontmarks = hashes.marks +----- parameters = fonts.hashes.parameters -- not in generic +----- resources = fonts.hashes.resources -- not in generic + +nodes.injections = nodes.injections or { } +local injections = nodes.injections + +local tracers = nodes.tracers +local setcolor = tracers and tracers.colors.set +local resetcolor = tracers and tracers.colors.reset + +local nodecodes = nodes.nodecodes +local glyph_code = nodecodes.glyph +local disc_code = nodecodes.disc +local kern_code = nodecodes.kern +local glue_code = nodecodes.glue + +local nuts = nodes.nuts +local nodepool = nuts.pool + +local tonode = nuts.tonode +local tonut = nuts.tonut + +local setfield = nuts.setfield +local getnext = nuts.getnext +local getprev = nuts.getprev +local getid = nuts.getid +local getfont = nuts.getfont +local getchar = nuts.getchar +local getoffsets = nuts.getoffsets +local getboth = nuts.getboth +local getdisc = nuts.getdisc +local setdisc = nuts.setdisc +local setoffsets = nuts.setoffsets +local ischar = nuts.ischar +local isnextchar = nuts.isnextchar +----- isprevchar = nuts.isprevchar +local getkern = nuts.getkern +local setkern = nuts.setkern +local setlink = nuts.setlink +local setwidth = nuts.setwidth +local getwidth = nuts.getwidth + +----- traverse_id = nuts.traverse_id +----- traverse_char = nuts.traverse_char +local nextchar = nuts.traversers.char +local nextglue = nuts.traversers.glue + +local insert_node_before = nuts.insert_before +local insert_node_after = nuts.insert_after + +local properties = nodes.properties.data + +local fontkern = nuts.pool and nuts.pool.fontkern -- context +local italickern = nuts.pool and nuts.pool.italickern -- context + +local useitalickerns = false -- context only + +directives.register("fonts.injections.useitalics", function(v) + if v then + report_injections("using italics for space kerns (tracing only)") + end + useitalickerns = v +end) + +if not fontkern then -- generic + + local thekern = nuts.new("kern",0) -- fontkern + local setkern = nuts.setkern + local copy_node = nuts.copy_node + + fontkern = function(k) + local n = copy_node(thekern) + setkern(n,k) + return n + end + +end + +if not italickern then -- generic + + local thekern = nuts.new("kern",3) -- italiccorrection + local setkern = nuts.setkern + local copy_node = nuts.copy_node + + italickern = function(k) + local n = copy_node(thekern) + setkern(n,k) + return n + end + +end + +function injections.installnewkern() end -- obsolete + +local nofregisteredkerns = 0 +local nofregisteredpositions = 0 +local nofregisteredmarks = 0 +local nofregisteredcursives = 0 +local keepregisteredcounts = false + +function injections.keepcounts() + keepregisteredcounts = true +end + +function injections.resetcounts() + nofregisteredkerns = 0 + nofregisteredpositions = 0 + nofregisteredmarks = 0 + nofregisteredcursives = 0 + keepregisteredcounts = false +end + +-- We need to make sure that a possible metatable will not kick in unexpectedly. + +function injections.reset(n) + local p = rawget(properties,n) + if p then + p.injections = false -- { } -- nil should work too as we use rawget + else + properties[n] = false -- { injections = { } } -- nil should work too as we use rawget + end +end + +function injections.copy(target,source) + local sp = rawget(properties,source) + if sp then + local tp = rawget(properties,target) + local si = sp.injections + if si then + si = fastcopy(si) + if tp then + tp.injections = si + else + properties[target] = { + injections = si, + } + end + elseif tp then + tp.injections = false -- { } + else + properties[target] = { injections = { } } + end + else + local tp = rawget(properties,target) + if tp then + tp.injections = false -- { } + else + properties[target] = false -- { injections = { } } + end + end +end + +function injections.setligaindex(n,index) -- todo: don't set when 0 + local p = rawget(properties,n) + if p then + local i = p.injections + if i then + i.ligaindex = index + else + p.injections = { + ligaindex = index + } + end + else + properties[n] = { + injections = { + ligaindex = index + } + } + end +end + +function injections.getligaindex(n,default) + local p = rawget(properties,n) + if p then + local i = p.injections + if i then + return i.ligaindex or default + end + end + return default +end + +function injections.setcursive(start,nxt,factor,rlmode,exit,entry,tfmstart,tfmnext,r2lflag) + + -- The standard says something about the r2lflag related to the first in a series: + -- + -- When this bit is set, the last glyph in a given sequence to which the cursive + -- attachment lookup is applied, will be positioned on the baseline. + -- + -- But it looks like we don't need to consider it. + + local dx = factor*(exit[1]-entry[1]) + local dy = -factor*(exit[2]-entry[2]) + local ws = tfmstart.width + local wn = tfmnext.width + nofregisteredcursives = nofregisteredcursives + 1 + if rlmode < 0 then + dx = -(dx + wn) + else + dx = dx - ws + end + if dx == 0 then + -- get rid of funny -0 + dx = 0 + end + -- + local p = rawget(properties,start) + if p then + local i = p.injections + if i then + i.cursiveanchor = true + else + p.injections = { + cursiveanchor = true, + } + end + else + properties[start] = { + injections = { + cursiveanchor = true, + }, + } + end + local p = rawget(properties,nxt) + if p then + local i = p.injections + if i then + i.cursivex = dx + i.cursivey = dy + else + p.injections = { + cursivex = dx, + cursivey = dy, + } + end + else + properties[nxt] = { + injections = { + cursivex = dx, + cursivey = dy, + }, + } + end + return dx, dy, nofregisteredcursives +end + +-- kind: 0=single 1=first of pair, 2=second of pair + +function injections.setposition(kind,current,factor,rlmode,spec,injection) + local x = factor * (spec[1] or 0) + local y = factor * (spec[2] or 0) + local w = factor * (spec[3] or 0) + local h = factor * (spec[4] or 0) + if x ~= 0 or w ~= 0 or y ~= 0 or h ~= 0 then -- okay? + local yoffset = y - h + local leftkern = x -- both kerns are set in a pair kern compared + local rightkern = w - x -- to normal kerns where we set only leftkern + if leftkern ~= 0 or rightkern ~= 0 or yoffset ~= 0 then + nofregisteredpositions = nofregisteredpositions + 1 + if rlmode and rlmode < 0 then + leftkern, rightkern = rightkern, leftkern + end + if not injection then + injection = "injections" + end + local p = rawget(properties,current) + if p then + local i = p[injection] + if i then + if leftkern ~= 0 then + i.leftkern = (i.leftkern or 0) + leftkern + end + if rightkern ~= 0 then + i.rightkern = (i.rightkern or 0) + rightkern + end + if yoffset ~= 0 then + i.yoffset = (i.yoffset or 0) + yoffset + end + elseif leftkern ~= 0 or rightkern ~= 0 then + p[injection] = { + leftkern = leftkern, + rightkern = rightkern, + yoffset = yoffset, + } + else + p[injection] = { + yoffset = yoffset, + } + end + elseif leftkern ~= 0 or rightkern ~= 0 then + properties[current] = { + [injection] = { + leftkern = leftkern, + rightkern = rightkern, + yoffset = yoffset, + }, + } + else + properties[current] = { + [injection] = { + yoffset = yoffset, + }, + } + end + return x, y, w, h, nofregisteredpositions + end + end + return x, y, w, h -- no bound +end + +-- The next one is used for simple kerns coming from a truetype kern table. The r2l +-- variant variant needs checking but it is unlikely that a r2l script uses thsi +-- feature. + +function injections.setkern(current,factor,rlmode,x,injection) + local dx = factor * x + if dx ~= 0 then + nofregisteredkerns = nofregisteredkerns + 1 + local p = rawget(properties,current) + if not injection then + injection = "injections" + end + if p then + local i = p[injection] + if i then + i.leftkern = dx + (i.leftkern or 0) + else + p[injection] = { + leftkern = dx, + } + end + else + properties[current] = { + [injection] = { + leftkern = dx, + }, + } + end + return dx, nofregisteredkerns + else + return 0, 0 + end +end + +-- This one is an optimization of pairs where we have only a "w" entry. This one is +-- potentially different from the previous one wrt r2l. It needs checking. The +-- optimization relates to smaller tma files. + +function injections.setmove(current,factor,rlmode,x,injection) + local dx = factor * x + if dx ~= 0 then + nofregisteredkerns = nofregisteredkerns + 1 + local p = rawget(properties,current) + if not injection then + injection = "injections" + end + if rlmode and rlmode < 0 then + -- we need to swap with a single so then we also need to to it here + -- as move is just a simple single + if p then + local i = p[injection] + if i then + i.rightkern = dx + (i.rightkern or 0) + else + p[injection] = { + rightkern = dx, + } + end + else + properties[current] = { + [injection] = { + rightkern = dx, + }, + } + end + else + if p then + local i = p[injection] + if i then + i.leftkern = dx + (i.leftkern or 0) + else + p[injection] = { + leftkern = dx, + } + end + else + properties[current] = { + [injection] = { + leftkern = dx, + }, + } + end + end + return dx, nofregisteredkerns + else + return 0, 0 + end +end + +function injections.setmark(start,base,factor,rlmode,ba,ma,tfmbase,mkmk,checkmark) -- ba=baseanchor, ma=markanchor + local dx = factor*(ba[1]-ma[1]) + local dy = factor*(ba[2]-ma[2]) + nofregisteredmarks = nofregisteredmarks + 1 + if rlmode >= 0 then + dx = tfmbase.width - dx -- see later commented ox + end + local p = rawget(properties,start) + -- hm, dejavu serif does a sloppy mark2mark before mark2base + if p then + local i = p.injections + if i then + if i.markmark then + -- out of order mkmk: yes or no or option + else + -- if dx ~= 0 then + -- i.markx = dx + -- end + -- if y ~= 0 then + -- i.marky = dy + -- end + -- if rlmode then + -- i.markdir = rlmode + -- end + i.markx = dx + i.marky = dy + i.markdir = rlmode or 0 + i.markbase = nofregisteredmarks + i.markbasenode = base + i.markmark = mkmk + i.checkmark = checkmark + end + else + p.injections = { + markx = dx, + marky = dy, + markdir = rlmode or 0, + markbase = nofregisteredmarks, + markbasenode = base, + markmark = mkmk, + checkmark = checkmark, + } + end + else + properties[start] = { + injections = { + markx = dx, + marky = dy, + markdir = rlmode or 0, + markbase = nofregisteredmarks, + markbasenode = base, + markmark = mkmk, + checkmark = checkmark, + }, + } + end + return dx, dy, nofregisteredmarks +end + +local function dir(n) + return (n and n<0 and "r-to-l") or (n and n>0 and "l-to-r") or "unset" +end + +local function showchar(n,nested) + local char = getchar(n) + report_injections("%wfont %s, char %U, glyph %c",nested and 2 or 0,getfont(n),char,char) +end + +local function show(n,what,nested,symbol) + if n then + local p = rawget(properties,n) + if p then + local i = p[what] + if i then + local leftkern = i.leftkern or 0 + local rightkern = i.rightkern or 0 + local yoffset = i.yoffset or 0 + local markx = i.markx or 0 + local marky = i.marky or 0 + local markdir = i.markdir or 0 + local markbase = i.markbase or 0 + local cursivex = i.cursivex or 0 + local cursivey = i.cursivey or 0 + local ligaindex = i.ligaindex or 0 + local cursbase = i.cursiveanchor + local margin = nested and 4 or 2 + -- + if rightkern ~= 0 or yoffset ~= 0 then + report_injections("%w%s pair: lx %p, rx %p, dy %p",margin,symbol,leftkern,rightkern,yoffset) + elseif leftkern ~= 0 then + report_injections("%w%s kern: dx %p",margin,symbol,leftkern) + end + if markx ~= 0 or marky ~= 0 or markbase ~= 0 then + report_injections("%w%s mark: dx %p, dy %p, dir %s, base %s",margin,symbol,markx,marky,markdir,markbase ~= 0 and "yes" or "no") + end + if cursivex ~= 0 or cursivey ~= 0 then + if cursbase then + report_injections("%w%s curs: base dx %p, dy %p",margin,symbol,cursivex,cursivey) + else + report_injections("%w%s curs: dx %p, dy %p",margin,symbol,cursivex,cursivey) + end + elseif cursbase then + report_injections("%w%s curs: base",margin,symbol) + end + if ligaindex ~= 0 then + report_injections("%w%s liga: index %i",margin,symbol,ligaindex) + end + end + end + end +end + +local function showsub(n,what,where) + report_injections("begin subrun: %s",where) + for n in nextchar, n do + showchar(n,where) + show(n,what,where," ") + end + report_injections("end subrun") +end + +local function trace(head,where) + report_injections() + report_injections("begin run %s: %s kerns, %s positions, %s marks and %s cursives registered", + where or "",nofregisteredkerns,nofregisteredpositions,nofregisteredmarks,nofregisteredcursives) + local n = head + while n do + local id = getid(n) + if id == glyph_code then + showchar(n) + show(n,"injections",false," ") + show(n,"preinjections",false,"<") + show(n,"postinjections",false,">") + show(n,"replaceinjections",false,"=") + show(n,"emptyinjections",false,"*") + elseif id == disc_code then + local pre, post, replace = getdisc(n) + if pre then + showsub(pre,"preinjections","pre") + end + if post then + showsub(post,"postinjections","post") + end + if replace then + showsub(replace,"replaceinjections","replace") + end + show(n,"emptyinjections",false,"*") + end + n = getnext(n) + end + report_injections("end run") +end + +local function show_result(head) + local current = head + local skipping = false + while current do + local id = getid(current) + if id == glyph_code then + local w = getwidth(current) + local x, y = getoffsets(current) + report_injections("char: %C, width %p, xoffset %p, yoffset %p",getchar(current),w,x,y) + skipping = false + elseif id == kern_code then + report_injections("kern: %p",getkern(current)) + skipping = false + elseif not skipping then + report_injections() + skipping = true + end + current = getnext(current) + end + report_injections() +end + +-- G +D-pre G +-- D-post+ +-- +D-replace+ +-- +-- G +D-pre +D-pre +-- D-post +D-post +-- +D-replace +D-replace + +local function inject_kerns_only(head,where) + if trace_injections then + trace(head,"kerns") + end + local current = head + local prev = nil + local next = nil + local prevdisc = nil + -- local prevglyph = nil + local pre = nil -- saves a lookup + local post = nil -- saves a lookup + local replace = nil -- saves a lookup + local pretail = nil -- saves a lookup + local posttail = nil -- saves a lookup + local replacetail = nil -- saves a lookup + while current do + -- local next = getnext(current) + -- local char, id = ischar(current) + local next, char, id = isnextchar(current) + if char then + local p = rawget(properties,current) + if p then + local i = p.injections + if i then + -- left|glyph|right + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + if prev and getid(prev) == glue_code then + if useitalickerns then + head = insert_node_before(head,current,italickern(leftkern)) + else + setwidth(prev, getwidth(prev) + leftkern) + end + else + head = insert_node_before(head,current,fontkern(leftkern)) + end + end + end + if prevdisc then + local done = false + if post then + local i = p.postinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + setlink(posttail,fontkern(leftkern)) + done = true + end + end + end + if replace then + local i = p.replaceinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + setlink(replacetail,fontkern(leftkern)) + done = true + end + end + else + local i = p.emptyinjections + if i then + -- glyph|disc|glyph (special case) + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + replace = fontkern(leftkern) + done = true + end + end + end + if done then + setdisc(prevdisc,pre,post,replace) + end + end + end + prevdisc = nil + -- prevglyph = current + elseif char == false then + -- other font + prevdisc = nil + -- prevglyph = current + elseif id == disc_code then + pre, post, replace, pretail, posttail, replacetail = getdisc(current,true) + local done = false + if pre then + -- left|pre glyphs|right + for n in nextchar, pre do + local p = rawget(properties,n) + if p then + local i = p.injections or p.preinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + pre = insert_node_before(pre,n,fontkern(leftkern)) + done = true + end + end + end + end + end + if post then + -- left|post glyphs|right + for n in nextchar, post do + local p = rawget(properties,n) + if p then + local i = p.injections or p.postinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + post = insert_node_before(post,n,fontkern(leftkern)) + done = true + end + end + end + end + end + if replace then + -- left|replace glyphs|right + for n in nextchar, replace do + local p = rawget(properties,n) + if p then + local i = p.injections or p.replaceinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + replace = insert_node_before(replace,n,fontkern(leftkern)) + done = true + end + end + end + end + end + if done then + setdisc(current,pre,post,replace) + end + -- prevglyph = nil + prevdisc = current + else + -- prevglyph = nil + prevdisc = nil + end + prev = current + current = next + end + -- + if keepregisteredcounts then + keepregisteredcounts = false + else + nofregisteredkerns = 0 + end + if trace_injections then + show_result(head) + end + return head +end + +local function inject_positions_only(head,where) + if trace_injections then + trace(head,"positions") + end + local current = head + local prev = nil + local next = nil + local prevdisc = nil + local prevglyph = nil + local pre = nil -- saves a lookup + local post = nil -- saves a lookup + local replace = nil -- saves a lookup + local pretail = nil -- saves a lookup + local posttail = nil -- saves a lookup + local replacetail = nil -- saves a lookup + while current do + -- local next = getnext(current) + -- local char, id = ischar(current) + local next, char, id = isnextchar(current) + if char then + local p = rawget(properties,current) + if p then + local i = p.injections + if i then + -- left|glyph|right + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(current,false,yoffset) + end + local leftkern = i.leftkern + local rightkern = i.rightkern + if leftkern and leftkern ~= 0 then + if rightkern and leftkern == -rightkern then + setoffsets(current,leftkern,false) + rightkern = 0 + elseif prev and getid(prev) == glue_code then + if useitalickerns then + head = insert_node_before(head,current,italickern(leftkern)) + else + setwidth(prev, getwidth(prev) + leftkern) + end + else + head = insert_node_before(head,current,fontkern(leftkern)) + end + end + if rightkern and rightkern ~= 0 then + if next and getid(next) == glue_code then + if useitalickerns then + insert_node_after(head,current,italickern(rightkern)) + else + setwidth(next, getwidth(next) + rightkern) + end + else + insert_node_after(head,current,fontkern(rightkern)) + end + end + else + local i = p.emptyinjections + if i then + -- glyph|disc|glyph (special case) + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + if next and getid(next) == disc_code then + if replace then + -- error, we expect an empty one + else + -- KE setfield(next,"replace",fontkern(rightkern)) -- maybe also leftkern + replace = fontkern(rightkern) -- maybe also leftkern + done = true --KE + end + end + end + end + end + if prevdisc then + local done = false + if post then + local i = p.postinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + setlink(posttail,fontkern(leftkern)) + done = true + end + end + end + if replace then + local i = p.replaceinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + setlink(replacetail,fontkern(leftkern)) + done = true + end + end + else + local i = p.emptyinjections + if i then + -- new .. okay? + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + replace = fontkern(leftkern) + done = true + end + end + end + if done then + setdisc(prevdisc,pre,post,replace) + end + end + end + prevdisc = nil + prevglyph = current + elseif char == false then + prevdisc = nil + prevglyph = current + elseif id == disc_code then + pre, post, replace, pretail, posttail, replacetail = getdisc(current,true) + local done = false + if pre then + -- left|pre glyphs|right + for n in nextchar, pre do + local p = rawget(properties,n) + if p then + local i = p.injections or p.preinjections + if i then + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(n,false,yoffset) + end + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + pre = insert_node_before(pre,n,fontkern(leftkern)) + done = true + end + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + insert_node_after(pre,n,fontkern(rightkern)) + done = true + end + end + end + end + end + if post then + -- left|post glyphs|right + for n in nextchar, post do + local p = rawget(properties,n) + if p then + local i = p.injections or p.postinjections + if i then + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(n,false,yoffset) + end + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + post = insert_node_before(post,n,fontkern(leftkern)) + done = true + end + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + insert_node_after(post,n,fontkern(rightkern)) + done = true + end + end + end + end + end + if replace then + -- left|replace glyphs|right + for n in nextchar, replace do + local p = rawget(properties,n) + if p then + local i = p.injections or p.replaceinjections + if i then + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(n,false,yoffset) + end + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + replace = insert_node_before(replace,n,fontkern(leftkern)) + done = true + end + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + insert_node_after(replace,n,fontkern(rightkern)) + done = true + end + end + end + end + end + if prevglyph then + if pre then + local p = rawget(properties,prevglyph) + if p then + local i = p.preinjections + if i then + -- glyph|pre glyphs + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + pre = insert_node_before(pre,pre,fontkern(rightkern)) + done = true + end + end + end + end + if replace then + local p = rawget(properties,prevglyph) + if p then + local i = p.replaceinjections + if i then + -- glyph|replace glyphs + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + replace = insert_node_before(replace,replace,fontkern(rightkern)) + done = true + end + end + end + end + end + if done then + setdisc(current,pre,post,replace) + end + prevglyph = nil + prevdisc = current + else + prevglyph = nil + prevdisc = nil + end + prev = current + current = next + end + -- + if keepregisteredcounts then + keepregisteredcounts = false + else + nofregisteredpositions = 0 + end + if trace_injections then + show_result(head) + end + return head +end + +local function showoffset(n,flag) + local x, y = getoffsets(n) + if x ~= 0 or y ~= 0 then + setcolor(n,"darkgray") + end +end + +local function inject_everything(head,where) + if trace_injections then + trace(head,"everything") + end + local hascursives = nofregisteredcursives > 0 + local hasmarks = nofregisteredmarks > 0 + -- + local current = head + local last = nil + local prev = nil + local next = nil + local prevdisc = nil + local prevglyph = nil + local pre = nil -- saves a lookup + local post = nil -- saves a lookup + local replace = nil -- saves a lookup + local pretail = nil -- saves a lookup + local posttail = nil -- saves a lookup + local replacetail = nil -- saves a lookup + -- + local cursiveanchor = nil + local minc = 0 + local maxc = 0 + local glyphs = { } + local marks = { } + local nofmarks = 0 + -- + -- local applyfix = hascursives and fix_cursive_marks + -- + -- move out + -- + local function processmark(p,n,pn) -- p = basenode + local px, py = getoffsets(p) + local nx, ny = getoffsets(n) + local ox = 0 + local rightkern = nil + local pp = rawget(properties,p) + if pp then + pp = pp.injections + if pp then + rightkern = pp.rightkern + end + end + local markdir = pn.markdir + if rightkern then -- x and w ~= 0 + ox = px - (pn.markx or 0) - rightkern + if markdir and markdir < 0 then + -- kern(w-x) glyph(p) kern(x) mark(n) + if not pn.markmark then + ox = ox + (pn.leftkern or 0) + end + else + -- kern(x) glyph(p) kern(w-x) mark(n) + -- + -- According to Kai we don't need to handle leftkern here but I'm + -- pretty sure I've run into a case where it was needed so maybe + -- some day we need something more clever here. + -- + -- maybe we need to apply both then + -- + if false then + -- a mark with kerning (maybe husayni needs it ) + local leftkern = pp.leftkern + if leftkern then + ox = ox - leftkern + end + end + end + else + ox = px - (pn.markx or 0) + if markdir and markdir < 0 then + if not pn.markmark then + local leftkern = pn.leftkern + if leftkern then + ox = ox + leftkern -- husayni needs it + end + end + end + if pn.checkmark then + local wn = getwidth(n) -- in arial marks have widths + if wn and wn ~= 0 then + wn = wn/2 + if trace_injections then + report_injections("correcting non zero width mark %C",getchar(n)) + end + -- -- bad: we should center + -- + -- pn.leftkern = -wn + -- pn.rightkern = -wn + -- + -- -- we're too late anyway as kerns are already injected so we do it the + -- -- ugly way (no checking if the previous is already a kern) .. maybe we + -- -- should fix the font instead + -- + -- todo: head and check for prev / next kern + -- + insert_node_before(n,n,fontkern(-wn)) + insert_node_after(n,n,fontkern(-wn)) + end + end + end + local oy = ny + py + (pn.marky or 0) + if not pn.markmark then + local yoffset = pn.yoffset + if yoffset then + oy = oy + yoffset -- husayni needs it + end + end + setoffsets(n,ox,oy) + if trace_marks then + showoffset(n,true) + end + end + -- begin of temp fix -- + -- local base = nil -- bah, some arabic fonts have no mark anchoring + -- end of temp fix -- + while current do + -- local next = getnext(current) + -- local char, id = ischar(current) + local next, char, id = isnextchar(current) + if char then + local p = rawget(properties,current) + -- begin of temp fix -- + -- if applyfix then + -- if not p then + -- local m = fontmarks[getfont(current)] + -- if m and m[char] then + -- if base then + -- p = { injections = { markbasenode = base } } + -- nofmarks = nofmarks + 1 + -- marks[nofmarks] = current + -- properties[current] = p + -- hasmarks = true + -- end + -- else + -- base = current + -- end + -- end + -- end + -- end of temp fix + if p then + local i = p.injections + -- begin of temp fix -- + -- if applyfix then + -- if not i then + -- local m = fontmarks[getfont(current)] + -- if m and m[char] then + -- if base then + -- i = { markbasenode = base } + -- nofmarks = nofmarks + 1 + -- marks[nofmarks] = current + -- p.injections = i + -- hasmarks = true + -- end + -- else + -- base = current + -- end + -- end + -- end + -- end of temp fix -- + if i then + local pm = i.markbasenode + -- begin of temp fix -- + -- if applyfix then + -- if not pm then + -- local m = fontmarks[getfont(current)] + -- if m and m[char] then + -- if base then + -- pm = base + -- i.markbasenode = pm + -- hasmarks = true + -- end + -- else + -- base = current + -- end + -- else + -- base = current + -- end + -- end + -- end of temp fix -- + if pm then + nofmarks = nofmarks + 1 + marks[nofmarks] = current + else + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(current,false,yoffset) + end + if hascursives then + local cursivex = i.cursivex + if cursivex then + if cursiveanchor then + if cursivex ~= 0 then + i.leftkern = (i.leftkern or 0) + cursivex + end + if maxc == 0 then + minc = 1 + maxc = 1 + glyphs[1] = cursiveanchor + else + maxc = maxc + 1 + glyphs[maxc] = cursiveanchor + end + properties[cursiveanchor].cursivedy = i.cursivey -- cursiveprops + last = current + else + maxc = 0 + end + elseif maxc > 0 then + local nx, ny = getoffsets(current) + for i=maxc,minc,-1 do + local ti = glyphs[i] + ny = ny + properties[ti].cursivedy + setoffsets(ti,false,ny) -- why not add ? + if trace_cursive then + showoffset(ti) + end + end + maxc = 0 + cursiveanchor = nil + end + if i.cursiveanchor then + cursiveanchor = current -- no need for both now + else + if maxc > 0 then + local nx, ny = getoffsets(current) + for i=maxc,minc,-1 do + local ti = glyphs[i] + ny = ny + properties[ti].cursivedy + setoffsets(ti,false,ny) -- why not add ? + if trace_cursive then + showoffset(ti) + end + end + maxc = 0 + end + cursiveanchor = nil + end + end + -- left|glyph|right + local leftkern = i.leftkern + local rightkern = i.rightkern + if leftkern and leftkern ~= 0 then + if rightkern and leftkern == -rightkern then + setoffsets(current,leftkern,false) + rightkern = 0 + elseif prev and getid(prev) == glue_code then + if useitalickerns then + head = insert_node_before(head,current,italickern(leftkern)) + else + setwidth(prev, getwidth(prev) + leftkern) + end + else + head = insert_node_before(head,current,fontkern(leftkern)) + end + end + if rightkern and rightkern ~= 0 then + if next and getid(next) == glue_code then + if useitalickerns then + insert_node_after(head,current,italickern(rightkern)) + else + setwidth(next, getwidth(next) + rightkern) + end + else + insert_node_after(head,current,fontkern(rightkern)) + end + end + end + else + local i = p.emptyinjections + if i then + -- glyph|disc|glyph (special case) + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + if next and getid(next) == disc_code then + if replace then + -- error, we expect an empty one + else + replace = fontkern(rightkern) + done = true + end + end + end + end + end + if prevdisc then + if p then + local done = false + if post then + local i = p.postinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + setlink(posttail,fontkern(leftkern)) + done = true + end + end + end + if replace then + local i = p.replaceinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + setlink(replacetail,fontkern(leftkern)) + done = true + end + end + else + local i = p.emptyinjections + if i then + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + replace = fontkern(leftkern) + done = true + end + end + end + if done then + setdisc(prevdisc,pre,post,replace) + end + end + end + else + -- cursive + if hascursives and maxc > 0 then + local nx, ny = getoffsets(current) + for i=maxc,minc,-1 do + local ti = glyphs[i] + ny = ny + properties[ti].cursivedy + local xi, yi = getoffsets(ti) + setoffsets(ti,xi,yi + ny) -- can be mark, we could use properties + end + maxc = 0 + cursiveanchor = nil + end + end + prevdisc = nil + prevglyph = current + elseif char == false then + -- base = nil + prevdisc = nil + prevglyph = current + elseif id == disc_code then + -- base = nil + pre, post, replace, pretail, posttail, replacetail = getdisc(current,true) + local done = false + if pre then + -- left|pre glyphs|right + for n in nextchar, pre do + local p = rawget(properties,n) + if p then + local i = p.injections or p.preinjections + if i then + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(n,false,yoffset) + end + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + pre = insert_node_before(pre,n,fontkern(leftkern)) + done = true + end + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + insert_node_after(pre,n,fontkern(rightkern)) + done = true + end + if hasmarks then + local pm = i.markbasenode + if pm then + processmark(pm,n,i) + end + end + end + end + end + end + if post then + -- left|post glyphs|right + for n in nextchar, post do + local p = rawget(properties,n) + if p then + local i = p.injections or p.postinjections + if i then + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(n,false,yoffset) + end + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + post = insert_node_before(post,n,fontkern(leftkern)) + done = true + end + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + insert_node_after(post,n,fontkern(rightkern)) + done = true + end + if hasmarks then + local pm = i.markbasenode + if pm then + processmark(pm,n,i) + end + end + end + end + end + end + if replace then + -- left|replace glyphs|right + for n in nextchar, replace do + local p = rawget(properties,n) + if p then + local i = p.injections or p.replaceinjections + if i then + local yoffset = i.yoffset + if yoffset and yoffset ~= 0 then + setoffsets(n,false,yoffset) + end + local leftkern = i.leftkern + if leftkern and leftkern ~= 0 then + replace = insert_node_before(replace,n,fontkern(leftkern)) + done = true + end + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + insert_node_after(replace,n,fontkern(rightkern)) + done = true + end + if hasmarks then + local pm = i.markbasenode + if pm then + processmark(pm,n,i) + end + end + end + end + end + end + if prevglyph then + if pre then + local p = rawget(properties,prevglyph) + if p then + local i = p.preinjections + if i then + -- glyph|pre glyphs + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + pre = insert_node_before(pre,pre,fontkern(rightkern)) + done = true + end + end + end + end + if replace then + local p = rawget(properties,prevglyph) + if p then + local i = p.replaceinjections + if i then + -- glyph|replace glyphs + local rightkern = i.rightkern + if rightkern and rightkern ~= 0 then + replace = insert_node_before(replace,replace,fontkern(rightkern)) + done = true + end + end + end + end + end + if done then + setdisc(current,pre,post,replace) + end + prevglyph = nil + prevdisc = current + else + -- base = nil + prevglyph = nil + prevdisc = nil + end + prev = current + current = next + end + -- cursive + if hascursives and maxc > 0 then + local nx, ny = getoffsets(last) + for i=maxc,minc,-1 do + local ti = glyphs[i] + ny = ny + properties[ti].cursivedy + setoffsets(ti,false,ny) -- why not add ? + if trace_cursive then + showoffset(ti) + end + end + end + -- + if nofmarks > 0 then + for i=1,nofmarks do + local m = marks[i] + local p = rawget(properties,m) + local i = p.injections + local b = i.markbasenode + processmark(b,m,i) + end + elseif hasmarks then + -- sometyhing bad happened + end + -- + if keepregisteredcounts then + keepregisteredcounts = false + else + nofregisteredkerns = 0 + nofregisteredpositions = 0 + nofregisteredmarks = 0 + nofregisteredcursives = 0 + end + if trace_injections then + show_result(head) + end + return head +end + +-- space triggers + +local triggers = false + +function nodes.injections.setspacekerns(font,sequence) + if triggers then + triggers[font] = sequence + else + triggers = { [font] = sequence } + end +end + +local getthreshold + +if context then + + local threshold = 1 -- todo: add a few methods for context + local parameters = fonts.hashes.parameters + + directives.register("otf.threshold", function(v) threshold = tonumber(v) or 1 end) + + getthreshold = function(font) + local p = parameters[font] + local f = p.factor + local s = p.spacing + local t = threshold * (s and s.width or p.space or 0) - 2 + return t > 0 and t or 0, f + end + +else + + injections.threshold = 0 + + getthreshold = function(font) + local p = fontdata[font].parameters + local f = p.factor + local s = p.spacing + local t = injections.threshold * (s and s.width or p.space or 0) - 2 + return t > 0 and t or 0, f + end + +end + +injections.getthreshold = getthreshold + +function injections.isspace(n,threshold,id) + if (id or getid(n)) == glue_code then + local w = getwidth(n) + if threshold and w > threshold then -- was >= + return 32 + end + end +end + +-- We have a plugin so that Kai can use the next in plain. Such a plugin is rather application +-- specific. +-- +-- local getboth = nodes.direct.getboth +-- local getid = nodes.direct.getid +-- local getprev = nodes.direct.getprev +-- local getnext = nodes.direct.getnext +-- +-- local whatsit_code = nodes.nodecodes.whatsit +-- local glyph_code = nodes.nodecodes.glyph +-- +-- local function getspaceboth(n) -- fragile: what it prev/next has no width field +-- local prev, next = getboth(n) +-- while prev and (getid(prev) == whatsit_code or (getwidth(prev) == 0 and getid(prev) ~= glyph_code)) do +-- prev = getprev(prev) +-- end +-- while next and (getid(next) == whatsit_code or (getwidth(next) == 0 and getid(next) ~= glyph_code)) do +-- next = getnext(next) +-- end +-- end +-- +-- injections.installgetspaceboth(getspaceboth) + +local getspaceboth = getboth + +function injections.installgetspaceboth(gb) + getspaceboth = gb or getboth +end + +local function injectspaces(head) + + if not triggers then + return head + end + local lastfont = nil + local spacekerns = nil + local leftkerns = nil + local rightkerns = nil + local factor = 0 + local threshold = 0 + local leftkern = false + local rightkern = false + + local function updatefont(font,trig) + leftkerns = trig.left + rightkerns = trig.right + lastfont = font + threshold, + factor = getthreshold(font) + end + + for n in nextglue, head do + local prev, next = getspaceboth(n) + local prevchar = prev and ischar(prev) + local nextchar = next and ischar(next) + if nextchar then + local font = getfont(next) + local trig = triggers[font] + if trig then + if lastfont ~= font then + updatefont(font,trig) + end + if rightkerns then + rightkern = rightkerns[nextchar] + end + end + end + if prevchar then + local font = getfont(prev) + local trig = triggers[font] + if trig then + if lastfont ~= font then + updatefont(font,trig) + end + if leftkerns then + leftkern = leftkerns[prevchar] + end + end + end + if leftkern then + local old = getwidth(n) + if old > threshold then + if rightkern then + if useitalickerns then + local lnew = leftkern * factor + local rnew = rightkern * factor + if trace_spaces then + report_spaces("%C [%p + %p + %p] %C",prevchar,lnew,old,rnew,nextchar) + end + head = insert_node_before(head,n,italickern(lnew)) + insert_node_after(head,n,italickern(rnew)) + else + local new = old + (leftkern + rightkern) * factor + if trace_spaces then + report_spaces("%C [%p -> %p] %C",prevchar,old,new,nextchar) + end + setwidth(n,new) + end + rightkern = false + else + if useitalickerns then + local new = leftkern * factor + if trace_spaces then + report_spaces("%C [%p + %p]",prevchar,old,new) + end + insert_node_after(head,n,italickern(new)) -- tricky with traverse but ok + else + local new = old + leftkern * factor + if trace_spaces then + report_spaces("%C [%p -> %p]",prevchar,old,new) + end + setwidth(n,new) + end + end + end + leftkern = false + elseif rightkern then + local old = getwidth(n) + if old > threshold then + if useitalickerns then + local new = rightkern * factor + if trace_spaces then + report_spaces("[%p + %p] %C",old,new,nextchar) + end + insert_node_after(head,n,italickern(new)) + else + local new = old + rightkern * factor + if trace_spaces then + report_spaces("[%p -> %p] %C",old,new,nextchar) + end + setwidth(n,new) + end + else + -- message + end + rightkern = false + end + end + + triggers = false + + return head +end + +-- + +function injections.handler(head,where) + if triggers then + head = injectspaces(head) + end + -- todo: marks only run too + if nofregisteredmarks > 0 or nofregisteredcursives > 0 then + if trace_injections then + report_injections("injection variant %a","everything") + end + return inject_everything(head,where) + elseif nofregisteredpositions > 0 then + if trace_injections then + report_injections("injection variant %a","positions") + end + return inject_positions_only(head,where) + elseif nofregisteredkerns > 0 then + if trace_injections then + report_injections("injection variant %a","kerns") + end + return inject_kerns_only(head,where) + else + return head + end +end + diff --git a/tex/context/base/mkxl/font-ots.lmt b/tex/context/base/mkxl/font-ots.lmt new file mode 100644 index 000000000..b34dfa847 --- /dev/null +++ b/tex/context/base/mkxl/font-ots.lmt @@ -0,0 +1,4521 @@ +if not modules then modules = { } end modules ['font-ots'] = { -- sequences + version = 1.001, + optimize = true, + comment = "companion to font-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +--[[ldx-- +<p>This module is a bit more split up that I'd like but since we also want to test +with plain <l n='tex'/> it has to be so. This module is part of <l n='context'/> +and discussion about improvements and functionality mostly happens on the +<l n='context'/> mailing list.</p> + +<p>The specification of OpenType is (or at least decades ago was) kind of vague. +Apart from a lack of a proper free specifications there's also the problem that +Microsoft and Adobe may have their own interpretation of how and in what order to +apply features. In general the Microsoft website has more detailed specifications +and is a better reference. There is also some information in the FontForge help +files. In the end we rely most on the Microsoft specification.</p> + +<p>Because there is so much possible, fonts might contain bugs and/or be made to +work with certain rederers. These may evolve over time which may have the side +effect that suddenly fonts behave differently. We don't want to catch all font +issues.</p> + +<p>After a lot of experiments (mostly by Taco, me and Idris) the first implementation +was already quite useful. When it did most of what we wanted, a more optimized version +evolved. Of course all errors are mine and of course the code can be improved. There +are quite some optimizations going on here and processing speed is currently quite +acceptable and has been improved over time. Many complex scripts are not yet supported +yet, but I will look into them as soon as <l n='context'/> users ask for it.</p> + +<p>The specification leaves room for interpretation. In case of doubt the Microsoft +implementation is the reference as it is the most complete one. As they deal with +lots of scripts and fonts, Kai and Ivo did a lot of testing of the generic code and +their suggestions help improve the code. I'm aware that not all border cases can be +taken care of, unless we accept excessive runtime, and even then the interference +with other mechanisms (like hyphenation) are not trivial.</p> + +<p>Especially discretionary handling has been improved much by Kai Eigner who uses complex +(latin) fonts. The current implementation is a compromis between his patches and my code +and in the meantime performance is quite ok. We cannot check all border cases without +compromising speed but so far we're okay. Given good test cases we can probably improve +it here and there. Especially chain lookups are non trivial with discretionaries but +things got much better over time thanks to Kai.</p> + +<p>Glyphs are indexed not by unicode but in their own way. This is because there is no +relationship with unicode at all, apart from the fact that a font might cover certain +ranges of characters. One character can have multiple shapes. However, at the +<l n='tex'/> end we use unicode so and all extra glyphs are mapped into a private +space. This is needed because we need to access them and <l n='tex'/> has to include +then in the output eventually.</p> + +<p>The initial data table is rather close to the open type specification and also not +that different from the one produced by <l n='fontforge'/> but we uses hashes instead. +In <l n='context'/> that table is packed (similar tables are shared) and cached on disk +so that successive runs can use the optimized table (after loading the table is +unpacked).</p> + +<p>This module is sparsely documented because it is has been a moving target. The +table format of the reader changed a bit over time and we experiment a lot with +different methods for supporting features. By now the structures are quite stable</p> + +<p>Incrementing the version number will force a re-cache. We jump the number by one +when there's a fix in the reader or processing code that can result in different +results.</p> + +<p>This code is also used outside context but in context it has to work with other +mechanisms. Both put some constraints on the code here.</p> + +--ldx]]-- + +-- Remark: We assume that cursives don't cross discretionaries which is okay because it +-- is only used in semitic scripts. +-- +-- Remark: We assume that marks precede base characters. +-- +-- Remark: When complex ligatures extend into discs nodes we can get side effects. Normally +-- this doesn't happen; ff\d{l}{l}{l} in lm works but ff\d{f}{f}{f}. +-- +-- Todo: check if we copy attributes to disc nodes if needed. +-- +-- Todo: it would be nice if we could get rid of components. In other places we can use +-- the unicode properties. We can just keep a lua table. +-- +-- Remark: We do some disc juggling where we need to keep in mind that the pre, post and +-- replace fields can have prev pointers to a nesting node ... I wonder if that is still +-- needed. +-- +-- Remark: This is not possible: +-- +-- \discretionary {alpha-} {betagammadelta} +-- {\discretionary {alphabeta-} {gammadelta} +-- {\discretionary {alphabetagamma-} {delta} +-- {alphabetagammadelta}}} +-- +-- Remark: Something is messed up: we have two mark / ligature indices, one at the +-- injection end and one here ... this is based on KE's patches but there is something +-- fishy there as I'm pretty sure that for husayni we need some connection (as it's much +-- more complex than an average font) but I need proper examples of all cases, not of +-- only some. +-- +-- Remark: I wonder if indexed would be faster than unicoded. It would be a major +-- rewrite to have char being unicode + an index field in glyph nodes. Also more +-- assignments have to be made in order to keep things in sync. So, it's a no-go. +-- +-- Remark: We can provide a fast loop when there are no disc nodes (tests show a 1% +-- gain). Smaller functions might perform better cache-wise. But ... memory becomes +-- faster anyway, so ... +-- +-- Remark: Some optimizations made sense for 5.2 but seem less important for 5.3 but +-- anyway served their purpose. +-- +-- Todo: just (0=l2r and 1=r2l) or maybe (r2l = true) + +-- Experiments with returning the data with the ischar are positive for lmtx but +-- have a performance hit on mkiv because there we need to wrap ischardata (pending +-- extensions to luatex which is unlikely to happen for such an experiment because +-- we then can't remove it). Actually it might make generic slightly faster. Also, +-- there are some corner cases where a data check comes before a char fetch and +-- we're talking of millions of calls there. At some point I might make a version +-- for lmtx that does it slightly different anyway. + +local type, next, tonumber = type, next, tonumber +local random = math.random +local formatters = string.formatters +local insert = table.insert + +local registertracker = trackers.register + +local logs = logs +local trackers = trackers +local nodes = nodes +local attributes = attributes +local fonts = fonts + +local otf = fonts.handlers.otf +local tracers = nodes.tracers + +local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end) +local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end) +local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end) +local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end) +local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end) +local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end) +local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end) +local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end) +local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end) +local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end) +local trace_details = false registertracker("otf.details", function(v) trace_details = v end) +local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end) +local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end) +local trace_plugins = false registertracker("otf.plugins", function(v) trace_plugins = v end) +local trace_chains = false registertracker("otf.chains", function(v) trace_chains = v end) + +local trace_kernruns = false registertracker("otf.kernruns", function(v) trace_kernruns = v end) +----- trace_discruns = false registertracker("otf.discruns", function(v) trace_discruns = v end) +local trace_compruns = false registertracker("otf.compruns", function(v) trace_compruns = v end) +local trace_testruns = false registertracker("otf.testruns", function(v) trace_testruns = v end) + +local forcediscretionaries = false +local forcepairadvance = false -- for testing + +directives.register("otf.forcediscretionaries",function(v) + forcediscretionaries = v +end) + +directives.register("otf.forcepairadvance",function(v) + forcepairadvance = v +end) + +local report_direct = logs.reporter("fonts","otf direct") +local report_subchain = logs.reporter("fonts","otf subchain") +local report_chain = logs.reporter("fonts","otf chain") +local report_process = logs.reporter("fonts","otf process") +local report_warning = logs.reporter("fonts","otf warning") +local report_run = logs.reporter("fonts","otf run") + +registertracker("otf.substitutions", "otf.singles","otf.multiples","otf.alternatives","otf.ligatures") +registertracker("otf.positions", "otf.marks","otf.kerns","otf.cursive") +registertracker("otf.actions", "otf.substitutions","otf.positions") +registertracker("otf.sample", "otf.steps","otf.substitutions","otf.positions","otf.analyzing") +registertracker("otf.sample.silent", "otf.steps=silent","otf.substitutions","otf.positions","otf.analyzing") + +local nuts = nodes.nuts + +local getnext = nuts.getnext +local setnext = nuts.setnext +local getprev = nuts.getprev +local setprev = nuts.setprev +local getboth = nuts.getboth +local setboth = nuts.setboth +local getid = nuts.getid +local getstate = nuts.getstate +local getsubtype = nuts.getsubtype +local setsubtype = nuts.setsubtype +local getchar = nuts.getchar +local setchar = nuts.setchar +local getdisc = nuts.getdisc +local setdisc = nuts.setdisc +local getreplace = nuts.getreplace +local setlink = nuts.setlink +local getwidth = nuts.getwidth + +local getglyphdata = nuts.getglyphdata + +--------------------------------------------------------------------------------------- + +-- Beware: In ConTeXt components no longer are real components. We only keep track of +-- their positions because some complex ligatures might need that. For the moment we +-- use an x_ prefix because for now generic follows the other approach. + +local copy_no_components = nuts.copy_no_components +local copy_only_glyphs = nuts.copy_only_glyphs +local count_components = nuts.count_components +local set_components = nuts.set_components +local get_components = nuts.get_components +local flush_components = nuts.flush_components + +--------------------------------------------------------------------------------------- + +local ischar = nuts.ischar +local isnextchar = nuts.isnextchar -- only makes sense when we know we need next +local isprevchar = nuts.isprevchar -- only makes sense when we know we need prev +local usesfont = nuts.uses_font + +local insert_node_after = nuts.insert_after +local copy_node = nuts.copy +local copy_node_list = nuts.copy_list +local remove_node = nuts.remove +local find_node_tail = nuts.tail +local flush_node_list = nuts.flush_list +local flush_node = nuts.flush_node +local end_of_math = nuts.end_of_math +local start_of_par = nuts.start_of_par + +local setmetatable = setmetatable +local setmetatableindex = table.setmetatableindex + +local nextnode = nuts.traversers.node + +----- zwnj = 0x200C +----- zwj = 0x200D + +local nodecodes = nodes.nodecodes +local glyphcodes = nodes.glyphcodes +local disccodes = nodes.disccodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local disc_code = nodecodes.disc +local math_code = nodecodes.math +local dir_code = nodecodes.dir +local par_code = nodecodes.par + +local lefttoright_code = nodes.dirvalues.lefttoright +local righttoleft_code = nodes.dirvalues.righttoleft + +local discretionarydisc_code = disccodes.discretionary +local ligatureglyph_code = glyphcodes.ligature + +local injections = nodes.injections +local setmark = injections.setmark +local setcursive = injections.setcursive +local setkern = injections.setkern +local setmove = injections.setmove +local setposition = injections.setposition +local resetinjection = injections.reset +local copyinjection = injections.copy +local setligaindex = injections.setligaindex +local getligaindex = injections.getligaindex + +local fontdata = fonts.hashes.identifiers +local fontfeatures = fonts.hashes.features + +local otffeatures = fonts.constructors.features.otf +local registerotffeature = otffeatures.register + +local onetimemessage = fonts.loggers.onetimemessage or function() end + +local getrandom = utilities and utilities.randomizer and utilities.randomizer.get + +otf.defaultnodealternate = "none" -- first last + +-- We use a few semi-global variables. The handler can be called nested but this assumes +-- that the same font is used. + +local tfmdata = false +local characters = false +local descriptions = false +local marks = false +local classes = false +local currentfont = false +local factor = 0 +local threshold = 0 +local checkmarks = false + +local discs = false +local spaces = false + +local sweepnode = nil +local sweephead = { } -- we don't nil entries but false them (no collection and such) + +local notmatchpre = { } -- to be checked: can we use false instead of nil / what if a == b tests +local notmatchpost = { } -- to be checked: can we use false instead of nil / what if a == b tests +local notmatchreplace = { } -- to be checked: can we use false instead of nil / what if a == b tests + +local handlers = { } + +local isspace = injections.isspace +local getthreshold = injections.getthreshold + +local checkstep = (tracers and tracers.steppers.check) or function() end +local registerstep = (tracers and tracers.steppers.register) or function() end +local registermessage = (tracers and tracers.steppers.message) or function() end + +-- local function checkdisccontent(d) +-- local pre, post, replace = getdisc(d) +-- if pre then for n in traverse_id(glue_code,pre) do report("pre: %s",nodes.idstostring(pre)) break end end +-- if post then for n in traverse_id(glue_code,post) do report("pos: %s",nodes.idstostring(post)) break end end +-- if replace then for n in traverse_id(glue_code,replace) do report("rep: %s",nodes.idstostring(replace)) break end end +-- end + +local function logprocess(...) + if trace_steps then + registermessage(...) + if trace_steps == "silent" then + return + end + end + report_direct(...) +end + +local function logwarning(...) + report_direct(...) +end + +local gref do + + local f_unicode = formatters["U+%X"] -- was ["%U"] + local f_uniname = formatters["U+%X (%s)"] -- was ["%U (%s)"] + local f_unilist = formatters["% t"] + + gref = function(n) -- currently the same as in font-otb + if type(n) == "number" then + local description = descriptions[n] + local name = description and description.name + if name then + return f_uniname(n,name) + else + return f_unicode(n) + end + elseif n then + local t = { } + for i=1,#n do + local ni = n[i] + if tonumber(ni) then -- later we will start at 2 + local di = descriptions[ni] + local nn = di and di.name + if nn then + t[#t+1] = f_uniname(ni,nn) + else + t[#t+1] = f_unicode(ni) + end + end + end + return f_unilist(t) + else + return "<error in node mode tracing>" + end + end + +end + +local function cref(dataset,sequence,index) + if not dataset then + return "no valid dataset" + end + local merged = sequence.merged and "merged " or "" + if index then + return formatters["feature %a, type %a, %schain lookup %a, index %a"]( + dataset[4],sequence.type,merged,sequence.name,index) + else + return formatters["feature %a, type %a, %schain lookup %a"]( + dataset[4],sequence.type,merged,sequence.name) + end +end + +local function pref(dataset,sequence) + return formatters["feature %a, type %a, %slookup %a"]( + dataset[4],sequence.type,sequence.merged and "merged " or "",sequence.name) +end + +local function mref(rlmode) + if not rlmode or rlmode >= 0 then + return "l2r" + else + return "r2l" + end +end + +-- The next code is somewhat complicated by the fact that some fonts can have ligatures made +-- from ligatures that themselves have marks. This was identified by Kai in for instance +-- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes +-- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next +-- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the +-- third component. + +-- We can assume that languages that use marks are not hyphenated. We can also assume +-- that at most one discretionary is present. + +-- We do need components in funny kerning mode but maybe I can better reconstruct then +-- as we do have the font components info available; removing components makes the +-- previous code much simpler. Also, later on copying and freeing becomes easier. +-- However, for arabic we need to keep them around for the sake of mark placement +-- and indices. + +local function flattendisk(head,disc) + local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true) + local prev, next = getboth(disc) + local ishead = head == disc + setdisc(disc) + flush_node(disc) + if pre then + flush_node_list(pre) + end + if post then + flush_node_list(post) + end + if ishead then + if replace then + if next then + setlink(replacetail,next) + end + return replace, replace + elseif next then + return next, next + else + -- return -- maybe warning + end + else + if replace then + if next then + setlink(replacetail,next) + end + setlink(prev,replace) + return head, replace + else + setlink(prev,next) -- checks for next anyway + return head, next + end + end +end + +local function appenddisc(disc,list) + local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true) + local posthead = list + local replacehead = copy_node_list(list) + if post then + setlink(posttail,posthead) + else + post = posthead + end + if replace then + setlink(replacetail,replacehead) + else + replace = replacehead + end + setdisc(disc,pre,post,replace) +end + +local function markstoligature(head,start,stop,char) + if start == stop and getchar(start) == char then + return head, start + else + local prev = getprev(start) + local next = getnext(stop) + setprev(start) + setnext(stop) + local base = copy_no_components(start,copyinjection) + if head == start then + head = base + end + resetinjection(base) + setchar(base,char) + setsubtype(base,ligatureglyph_code) + set_components(base,start) + setlink(prev,base,next) + flush_components(start) + return head, base + end +end + +-- Remark for Kai: (some arabic fonts do mark + mark = other mark and such) +-- +-- The hasmarks is needed for ligatures of marks that are part of a ligature in +-- which case we assume that we can delete the marks anyway (we can always become +-- more clever if needed) .. in fact the whole logic here should be redone. We're +-- in the not discfound branch then. We now have skiphash too so we can be more +-- selective if needed (todo). + +-- we can have more granularity here but for now we only do a simple check + +local no_left_ligature_code = 1 +local no_right_ligature_code = 2 +local no_left_kern_code = 4 +local no_right_kern_code = 8 + +local has_glyph_option = nuts.has_glyph_option + +-- in lmtx we need to check the components and can be slightly more clever + +local function toligature(head,start,stop,char,dataset,sequence,skiphash,discfound,hasmarks) -- brr head + if has_glyph_option(start,no_right_ligature_code) then + return head, start + end + if start == stop and getchar(start) == char then + resetinjection(start) + setchar(start,char) + return head, start + end + local prev = getprev(start) + local next = getnext(stop) + local comp = start + setprev(start) + setnext(stop) + local base = copy_no_components(start,copyinjection) + if start == head then + head = base + end + resetinjection(base) + setchar(base,char) + setsubtype(base,ligatureglyph_code) + set_components(base,comp) + setlink(prev,base,next) + if not discfound then + local deletemarks = not skiphash or hasmarks + local components = start -- not used + local baseindex = 0 + local componentindex = 0 + local head = base + local current = base + -- first we loop over the glyphs in start ... stop + while start do + local char = getchar(start) + if not marks[char] then + baseindex = baseindex + componentindex + componentindex = count_components(start,marks) + -- we can be more clever here: "not deletemarks or (skiphash and not skiphash[char])" + -- and such: + elseif not deletemarks then + -- we can get a loop when the font expects otherwise (i.e. unexpected deletemarks) + setligaindex(start,baseindex + getligaindex(start,componentindex)) + if trace_marks then + logwarning("%s: keep ligature mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start)) + end + local n = copy_node(start) + copyinjection(n,start) -- is this ok ? we position later anyway + head, current = insert_node_after(head,current,n) -- unlikely that mark has components + elseif trace_marks then + logwarning("%s: delete ligature mark %s",pref(dataset,sequence),gref(char)) + end + start = getnext(start) + end + -- we can have one accent as part of a lookup and another following + local start = getnext(current) + while start do + local char = ischar(start) + if char then + -- also something skiphash here? + if marks[char] then + setligaindex(start,baseindex + getligaindex(start,componentindex)) + if trace_marks then + logwarning("%s: set ligature mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start)) + end + start = getnext(start) + else + break + end + else + break + end + end + flush_components(components) + else + -- discfound ... forget about marks .. probably no scripts that hyphenate and have marks + local discprev, discnext = getboth(discfound) + if discprev and discnext then + -- we assume normalization in context, and don't care about generic ... especially + -- \- can give problems as there we can have a negative char but that won't match + -- anyway + local pre, post, replace, pretail, posttail, replacetail = getdisc(discfound,true) + if not replace then + -- looks like we never come here as it's not okay + local prev = getprev(base) + -- local comp = get_components(base) -- already set + local copied = copy_only_glyphs(comp) + if pre then + setlink(discprev,pre) + else + setnext(discprev) -- also blocks funny assignments + end + pre = comp -- is start + if post then + setlink(posttail,discnext) + setprev(post) -- nil anyway + else + post = discnext + setprev(discnext) -- also blocks funny assignments + end + setlink(prev,discfound,next) + setboth(base) + -- here components have a pointer so we can't free it! + set_components(base,copied) + replace = base + if forcediscretionaries then + setdisc(discfound,pre,post,replace,discretionarydisc_code) + else + setdisc(discfound,pre,post,replace) + end + base = prev + end + end + end + return head, base +end + +local function multiple_glyphs(head,start,multiple,skiphash,what,stop) -- what to do with skiphash matches here + local nofmultiples = #multiple + if nofmultiples > 0 then + resetinjection(start) + setchar(start,multiple[1]) + if nofmultiples > 1 then + local sn = getnext(start) + for k=2,nofmultiples do + -- untested: + -- + -- while ignoremarks and marks[getchar(sn)] then + -- local sn = getnext(sn) + -- end + local n = copy_node(start) -- ignore components + resetinjection(n) + setchar(n,multiple[k]) + insert_node_after(head,start,n) + start = n + end + if what == true then + -- we're ok + elseif what > 1 then + local m = multiple[nofmultiples] + for i=2,what do + local n = copy_node(start) -- ignore components + resetinjection(n) + setchar(n,m) + insert_node_after(head,start,n) + start = n + end + end + end + return head, start, true + else + if trace_multiples then + logprocess("no multiple for %s",gref(getchar(start))) + end + return head, start, false + end +end + +local function get_alternative_glyph(start,alternatives,value) + local n = #alternatives + if n == 1 then + -- we could actually change that into a gsub and save some memory in the + -- font loader but it makes tracing more messy + return alternatives[1], trace_alternatives and "1 (only one present)" + elseif value == "random" then + local r = getrandom and getrandom("glyph",1,n) or random(1,n) + return alternatives[r], trace_alternatives and formatters["value %a, taking %a"](value,r) + elseif value == "first" then + return alternatives[1], trace_alternatives and formatters["value %a, taking %a"](value,1) + elseif value == "last" then + return alternatives[n], trace_alternatives and formatters["value %a, taking %a"](value,n) + end + value = value == true and 1 or tonumber(value) + if type(value) ~= "number" then + return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,1) + end + -- local a = alternatives[value] + -- if a then + -- -- some kind of hash + -- return a, trace_alternatives and formatters["value %a, taking %a"](value,a) + -- end + if value > n then + local defaultalt = otf.defaultnodealternate + if defaultalt == "first" then + return alternatives[n], trace_alternatives and formatters["invalid value %s, taking %a"](value,1) + elseif defaultalt == "last" then + return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,n) + else + return false, trace_alternatives and formatters["invalid value %a, %s"](value,"out of range") + end + elseif value == 0 then + return getchar(start), trace_alternatives and formatters["invalid value %a, %s"](value,"no change") + elseif value < 1 then + return alternatives[1], trace_alternatives and formatters["invalid value %a, taking %a"](value,1) + else + return alternatives[value], trace_alternatives and formatters["value %a, taking %a"](value,value) + end +end + +-- handlers + +function handlers.gsub_single(head,start,dataset,sequence,replacement) + if trace_singles then + logprocess("%s: replacing %s by single %s",pref(dataset,sequence),gref(getchar(start)),gref(replacement)) + end + resetinjection(start) + setchar(start,replacement) + return head, start, true +end + +function handlers.gsub_alternate(head,start,dataset,sequence,alternative) + local kind = dataset[4] + local what = dataset[1] + local value = what == true and tfmdata.shared.features[kind] or what + local choice, comment = get_alternative_glyph(start,alternative,value) + if choice then + if trace_alternatives then + logprocess("%s: replacing %s by alternative %a to %s, %s",pref(dataset,sequence),gref(getchar(start)),gref(choice),comment) + end + resetinjection(start) + setchar(start,choice) + else + if trace_alternatives then + logwarning("%s: no variant %a for %s, %s",pref(dataset,sequence),value,gref(getchar(start)),comment) + end + end + return head, start, true +end + +function handlers.gsub_multiple(head,start,dataset,sequence,multiple,rlmode,skiphash) + if trace_multiples then + logprocess("%s: replacing %s by multiple %s",pref(dataset,sequence),gref(getchar(start)),gref(multiple)) + end + return multiple_glyphs(head,start,multiple,skiphash,dataset[1]) +end + +-- Don't we deal with disc otherwise now? I need to check if the next one can be +-- simplified. Anyway, it can be way messier: marks that get removed as well as +-- marks that are kept. + +function handlers.gsub_ligature(head,start,dataset,sequence,ligature,rlmode,skiphash) + local current = getnext(start) + if not current then + return head, start, false, nil + end + local stop = nil + local startchar = getchar(start) + if skiphash and skiphash[startchar] then + while current do + -- local char = ischar(current,currentfont) + local nxt, char = isnextchar(current,currentfont) + if char then + local lg = ligature[char] + if lg then + stop = current + ligature = lg + -- current = getnext(current) + current = nxt + else + break + end + else + break + end + end + if stop then + local lig = ligature.ligature + if lig then + if trace_ligatures then + local stopchar = getchar(stop) + head, start = markstoligature(head,start,stop,lig) + logprocess("%s: replacing %s upto %s by ligature %s case 1",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(getchar(start))) + else + head, start = markstoligature(head,start,stop,lig) + end + return head, start, true, false + else + -- ok, goto next lookup + end + end + else + local discfound = false + local hasmarks = marks[startchar] + while current do + local char, id = ischar(current,currentfont) + if char then + if skiphash and skiphash[char] then + current = getnext(current) + else + local lg = ligature[char] + if lg then + if marks[char] then + hasmarks = true + end + stop = current -- needed for fake so outside then + ligature = lg + current = getnext(current) + else + break + end + end + elseif char == false then + -- kind of weird + break + elseif id == disc_code then + discfound = current + break + else + break + end + end + -- of{f-}{}{f}e o{f-}{}{f}fe o{-}{}{ff}e (oe and ff ligature) + -- we can end up here when we have a start run .. testruns start at a disc but + -- so here we have the other case: char + disc + if discfound then + -- don't assume marks in a disc and we don't run over a disc (for now) + local pre, post, replace = getdisc(discfound) + local match + if replace then + local char = ischar(replace,currentfont) + if char and ligature[char] then + match = true + end + end + if not match and pre then + local char = ischar(pre,currentfont) + if char and ligature[char] then + match = true + end + end + if not match and not pre or not replace then + local n = getnext(discfound) + local char = ischar(n,currentfont) + if char and ligature[char] then + match = true + end + end + if match then + -- we force a restart + local ishead = head == start + local prev = getprev(start) + if stop then + setnext(stop) + local copy = copy_node_list(start) + local tail = stop -- was: getprev(stop) -- Kai: needs checking on your samples + local liat = find_node_tail(copy) + if pre then + setlink(liat,pre) + end + if replace then + setlink(tail,replace) + end + pre = copy + replace = start + else + setnext(start) + local copy = copy_node(start) + if pre then + setlink(copy,pre) + end + if replace then + setlink(start,replace) + end + pre = copy + replace = start + end + setdisc(discfound,pre,post,replace) + if prev then + setlink(prev,discfound) + else + setprev(discfound) + head = discfound + end + start = discfound + return head, start, true, true + end + end + local lig = ligature.ligature + if lig then + if stop then + if trace_ligatures then + local stopchar = getchar(stop) + -- head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,discfound,hasmarks) + head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,false,hasmarks) + logprocess("%s: replacing %s upto %s by ligature %s case 2",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(lig)) + else + -- head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,discfound,hasmarks) + head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,false,hasmarks) + end + else + -- weird but happens (in some arabic font) + resetinjection(start) + setchar(start,lig) + if trace_ligatures then + logprocess("%s: replacing %s by (no real) ligature %s case 3",pref(dataset,sequence),gref(startchar),gref(lig)) + end + end + return head, start, true, false + else + -- weird but happens, pseudo ligatures ... just the components + end + end + return head, start, false, false +end + +function handlers.gpos_single(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection) + if has_glyph_option(start,no_right_kern_code) then + return head, start, false + else + local startchar = getchar(start) + local format = step.format + if format == "single" or type(kerns) == "table" then -- the table check can go + local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns,injection) + if trace_kerns then + logprocess("%s: shifting single %s by %s xy (%p,%p) and wh (%p,%p)",pref(dataset,sequence),gref(startchar),format,dx,dy,w,h) + end + else + local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection) + if trace_kerns then + logprocess("%s: shifting single %s by %s %p",pref(dataset,sequence),gref(startchar),format,k) + end + end + return head, start, true + end +end + +function handlers.gpos_pair(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection) + if has_glyph_option(start,no_right_kern_code) then + return head, start, false + else + local snext = getnext(start) + if not snext then + return head, start, false + else + local prev = start + while snext do + local nextchar = ischar(snext,currentfont) + if nextchar then + if skiphash and skiphash[nextchar] then -- includes marks too when flag + prev = snext + snext = getnext(snext) + else + local krn = kerns[nextchar] + if not krn then + break + end + local format = step.format + if format == "pair" then + local a = krn[1] + local b = krn[2] + if a == true then + -- zero + elseif a then -- #a > 0 + local x, y, w, h = setposition(1,start,factor,rlmode,a,injection) + if trace_kerns then + local startchar = getchar(start) + logprocess("%s: shifting first of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections") + end + end + if b == true then + -- zero + start = snext -- cf spec + elseif b then -- #b > 0 + local x, y, w, h = setposition(2,snext,factor,rlmode,b,injection) + if trace_kerns then + local startchar = getchar(start) + logprocess("%s: shifting second of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections") + end + start = snext -- cf spec + elseif forcepairadvance then + start = snext -- for testing, not cf spec + end + return head, start, true + elseif krn ~= 0 then + local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn,injection) + if trace_kerns then + logprocess("%s: inserting %s %p between %s and %s as %s",pref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar),injection or "injections") + end + return head, start, true + else -- can't happen + break + end + end + else + break + end + end + return head, start, false + end + end +end + +--[[ldx-- +<p>We get hits on a mark, but we're not sure if the it has to be applied so +we need to explicitly test for basechar, baselig and basemark entries.</p> +--ldx]]-- + +function handlers.gpos_mark2base(head,start,dataset,sequence,markanchors,rlmode,skiphash) + local markchar = getchar(start) + if marks[markchar] then + local base = getprev(start) -- [glyph] [start=mark] + if base then + local basechar = ischar(base,currentfont) + if basechar then + if marks[basechar] then + while base do + base = getprev(base) + if base then + basechar = ischar(base,currentfont) + if basechar then + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1) + end + return head, start, false + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2) + end + return head, start, false + end + end + end + local ba = markanchors[1][basechar] + if ba then + local ma = markanchors[2] + local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks) + if trace_marks then + logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)", + pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + elseif trace_bugs then + -- onetimemessage(currentfont,basechar,"no base anchors",report_fonts) + logwarning("%s: mark %s is not anchored to %s",pref(dataset,sequence),gref(markchar),gref(basechar)) + end + elseif trace_bugs then + logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),1) + end + elseif trace_bugs then + logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),2) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar)) + end + return head, start, false +end + +function handlers.gpos_mark2ligature(head,start,dataset,sequence,markanchors,rlmode,skiphash) + local markchar = getchar(start) + if marks[markchar] then + local base = getprev(start) -- [glyph] [optional marks] [start=mark] + if base then + local basechar = ischar(base,currentfont) + if basechar then + if marks[basechar] then + while base do + base = getprev(base) + if base then + basechar = ischar(base,currentfont) + if basechar then + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1) + end + return head, start, false + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2) + end + return head, start, false + end + end + end + local ba = markanchors[1][basechar] + if ba then + local ma = markanchors[2] + if ma then + local index = getligaindex(start) + ba = ba[index] + if ba then + local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks) + if trace_marks then + logprocess("%s, index %s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)", + pref(dataset,sequence),index,bound,gref(markchar),gref(basechar),index,dx,dy) + end + return head, start, true + else + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and baselig %s with index %a",pref(dataset,sequence),gref(markchar),gref(basechar),index) + end + end + end + elseif trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(dataset,sequence),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) + end + elseif trace_bugs then + logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),1) + end + elseif trace_bugs then + logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),2) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar)) + end + return head, start, false +end + +function handlers.gpos_mark2mark(head,start,dataset,sequence,markanchors,rlmode,skiphash) + local markchar = getchar(start) + if marks[markchar] then + local base = getprev(start) -- [glyph] [basemark] [start=mark] + local slc = getligaindex(start) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = getligaindex(base) + if blc and blc ~= slc then + base = getprev(base) + else + break + end + end + end + if base then + local basechar = ischar(base,currentfont) + if basechar then -- subtype test can go + local ba = markanchors[1][basechar] -- slot 1 has been made copy of the class hash + if ba then + local ma = markanchors[2] + local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks) + if trace_marks then + logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)", + pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + end + end + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar)) + end + return head, start, false +end + +function handlers.gpos_cursive(head,start,dataset,sequence,exitanchors,rlmode,skiphash,step) -- to be checked + local startchar = getchar(start) + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar)) + end + else + local nxt = getnext(start) + while nxt do + local nextchar = ischar(nxt,currentfont) + if not nextchar then + break + elseif marks[nextchar] then -- always sequence.flags[1] + nxt = getnext(nxt) + else + local exit = exitanchors[3] + if exit then + local entry = exitanchors[1][nextchar] + if entry then + entry = entry[2] + if entry then + local r2lflag = sequence.flags[4] -- mentioned in the standard + local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode)) + end + return head, start, true + end + end + end + break + end + end + end + return head, start, false +end + +--[[ldx-- +<p>I will implement multiple chain replacements once I run into a font that uses +it. It's not that complex to handle.</p> +--ldx]]-- + +local chainprocs = { } + +local function logprocess(...) + if trace_steps then + registermessage(...) + if trace_steps == "silent" then + return + end + end + report_subchain(...) +end + +local logwarning = report_subchain + +local function logprocess(...) + if trace_steps then + registermessage(...) + if trace_steps == "silent" then + return + end + end + report_chain(...) +end + +local logwarning = report_chain + +-- We could share functions but that would lead to extra function calls with many +-- arguments, redundant tests and confusing messages. + +-- The reversesub is a special case, which is why we need to store the replacements +-- in a bit weird way. There is no lookup and the replacement comes from the lookup +-- itself. It is meant mostly for dealing with Urdu. + +local function reversesub(head,start,stop,dataset,sequence,replacements,rlmode,skiphash) + local char = getchar(start) + local replacement = replacements[char] + if replacement then + if trace_singles then + logprocess("%s: single reverse replacement of %s by %s",cref(dataset,sequence),gref(char),gref(replacement)) + end + resetinjection(start) + setchar(start,replacement) + return head, start, true + else + return head, start, false + end +end + + +chainprocs.reversesub = reversesub + +--[[ldx-- +<p>This chain stuff is somewhat tricky since we can have a sequence of actions to be +applied: single, alternate, multiple or ligature where ligature can be an invalid +one in the sense that it will replace multiple by one but not neccessary one that +looks like the combination (i.e. it is the counterpart of multiple then). For +example, the following is valid:</p> + +<typing> +<line>xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx</line> +</typing> + +<p>Therefore we we don't really do the replacement here already unless we have the +single lookup case. The efficiency of the replacements can be improved by deleting +as less as needed but that would also make the code even more messy.</p> +--ldx]]-- + +--[[ldx-- +<p>Here we replace start by a single variant.</p> +--ldx]]-- + +-- To be done (example needed): what if > 1 steps + +-- this is messy: do we need this disc checking also in alternaties? + +local function reportzerosteps(dataset,sequence) + logwarning("%s: no steps",cref(dataset,sequence)) +end + +local function reportmoresteps(dataset,sequence) + logwarning("%s: more than 1 step",cref(dataset,sequence)) +end + +-- local function reportbadsteps(dataset,sequence) +-- logwarning("%s: bad step, no proper return values",cref(dataset,sequence)) +-- end + +local function getmapping(dataset,sequence,currentlookup) + local steps = currentlookup.steps + local nofsteps = currentlookup.nofsteps + if nofsteps == 0 then + reportzerosteps(dataset,sequence) + currentlookup.mapping = false + return false + else + if nofsteps > 1 then + reportmoresteps(dataset,sequence) + end + local mapping = steps[1].coverage + currentlookup.mapping = mapping + currentlookup.format = steps[1].format + return mapping + end +end + +function chainprocs.gsub_remove(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + if trace_chains then + logprocess("%s: removing character %s",cref(dataset,sequence,chainindex),gref(getchar(start))) + end + head, start = remove_node(head,start,true) + return head, getprev(start), true +end + +function chainprocs.gsub_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local current = start + while current do + local currentchar = ischar(current) + if currentchar then + local replacement = mapping[currentchar] + if not replacement or replacement == "" then + if trace_bugs then + logwarning("%s: no single for %s",cref(dataset,sequence,chainindex),gref(currentchar)) + end + else + if trace_singles then + logprocess("%s: replacing single %s by %s",cref(dataset,sequence,chainindex),gref(currentchar),gref(replacement)) + end + resetinjection(current) + setchar(current,replacement) + end + return head, start, true + elseif currentchar == false then + -- can't happen + break + elseif current == stop then + break + else + current = getnext(current) + end + end + end + return head, start, false +end + +--[[ldx-- +<p>Here we replace start by new glyph. First we delete the rest of the match.</p> +--ldx]]-- + +-- char_1 mark_1 -> char_x mark_1 (ignore marks) +-- char_1 mark_1 -> char_x + +-- to be checked: do we always have just one glyph? +-- we can also have alternates for marks +-- marks come last anyway +-- are there cases where we need to delete the mark + +function chainprocs.gsub_alternate(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local kind = dataset[4] + local what = dataset[1] + local value = what == true and tfmdata.shared.features[kind] or what -- todo: optimize in ctx + local current = start + while current do + local currentchar = ischar(current) + if currentchar then + local alternatives = mapping[currentchar] + if alternatives then + local choice, comment = get_alternative_glyph(current,alternatives,value) + if choice then + if trace_alternatives then + logprocess("%s: replacing %s by alternative %a to %s, %s",cref(dataset,sequence),gref(currentchar),choice,gref(choice),comment) + end + resetinjection(start) + setchar(start,choice) + else + if trace_alternatives then + logwarning("%s: no variant %a for %s, %s",cref(dataset,sequence),value,gref(currentchar),comment) + end + end + end + return head, start, true + elseif currentchar == false then + -- can't happen + break + elseif current == stop then + break + else + current = getnext(current) + end + end + end + return head, start, false +end + +--[[ldx-- +<p>Here we replace start by a sequence of new glyphs.</p> +--ldx]]-- + +function chainprocs.gsub_multiple(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local startchar = getchar(start) + local replacement = mapping[startchar] + if not replacement or replacement == "" then + if trace_bugs then + logwarning("%s: no multiple for %s",cref(dataset,sequence),gref(startchar)) + end + else + if trace_multiples then + logprocess("%s: replacing %s by multiple characters %s",cref(dataset,sequence),gref(startchar),gref(replacement)) + end + return multiple_glyphs(head,start,replacement,skiphash,dataset[1],stop) + end + end + return head, start, false +end + +--[[ldx-- +<p>When we replace ligatures we use a helper that handles the marks. I might change +this function (move code inline and handle the marks by a separate function). We +assume rather stupid ligatures (no complex disc nodes).</p> +--ldx]]-- + +-- compare to handlers.gsub_ligature which is more complex ... why + +function chainprocs.gsub_ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local startchar = getchar(start) + local ligatures = mapping[startchar] + if not ligatures then + if trace_bugs then + logwarning("%s: no ligatures starting with %s",cref(dataset,sequence,chainindex),gref(startchar)) + end + else + local hasmarks = marks[startchar] + local current = getnext(start) + local discfound = false + local last = stop + local nofreplacements = 1 + while current do + -- todo: ischar ... can there really be disc nodes here? + local id = getid(current) + if id == disc_code then + if not discfound then + discfound = current + end + if current == stop then + break -- okay? or before the disc + else + current = getnext(current) + end + else + local schar = getchar(current) + if skiphash and skiphash[schar] then -- marks + -- if current == stop then -- maybe add this + -- break + -- else + current = getnext(current) + -- end + else + local lg = ligatures[schar] + if lg then + ligatures = lg + last = current + nofreplacements = nofreplacements + 1 + if marks[char] then + hasmarks = true + end + if current == stop then + break + else + current = getnext(current) + end + else + break + end + end + end + end + local ligature = ligatures.ligature + if ligature then + if chainindex then + stop = last + end + if trace_ligatures then + if start == stop then + logprocess("%s: replacing character %s by ligature %s case 3",cref(dataset,sequence,chainindex),gref(startchar),gref(ligature)) + else + logprocess("%s: replacing character %s upto %s by ligature %s case 4",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop)),gref(ligature)) + end + end + head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks) + return head, start, true, nofreplacements, discfound + elseif trace_bugs then + if start == stop then + logwarning("%s: replacing character %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar)) + else + logwarning("%s: replacing character %s upto %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop))) + end + end + end + end + return head, start, false, 0, false +end + +function chainprocs.gpos_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + if has_glyph_option(start,no_right_kern_code) then + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local startchar = getchar(start) + local kerns = mapping[startchar] + if kerns then + local format = currentlookup.format + if format == "single" then + local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns) -- currentlookup.flags ? + if trace_kerns then + logprocess("%s: shifting single %s by %s (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),format,dx,dy,w,h) + end + else -- needs checking .. maybe no kerns format for single + local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection) + if trace_kerns then + logprocess("%s: shifting single %s by %s %p",cref(dataset,sequence),gref(startchar),format,k) + end + end + return head, start, true + end + end + end + return head, start, false +end + +function chainprocs.gpos_pair(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) -- todo: injections ? + if has_glyph_option(start,no_right_kern_code) then + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local snext = getnext(start) + if snext then + local startchar = getchar(start) + local kerns = mapping[startchar] -- always 1 step + if kerns then + local prev = start + while snext do + local nextchar = ischar(snext,currentfont) + if not nextchar then + break + end + if skiphash and skiphash[nextchar] then + prev = snext + snext = getnext(snext) + else + local krn = kerns[nextchar] + if not krn then + break + end + local format = currentlookup.format + if format == "pair" then + local a = krn[1] + local b = krn[2] + if a == true then + -- zero + elseif a then + local x, y, w, h = setposition(1,start,factor,rlmode,a,"injections") -- currentlookups flags? + if trace_kerns then + local startchar = getchar(start) + logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h) + end + end + if b == true then + -- zero + start = snext -- cf spec + elseif b then -- #b > 0 + local x, y, w, h = setposition(2,snext,factor,rlmode,b,"injections") + if trace_kerns then + local startchar = getchar(start) + logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h) + end + start = snext -- cf spec + elseif forcepairadvance then + start = snext -- for testing, not cf spec + end + return head, start, true + elseif krn ~= 0 then + local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn) + if trace_kerns then + logprocess("%s: inserting %s %p between %s and %s",cref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar)) + end + return head, start, true + else + break + end + end + end + end + end + end + end + return head, start, false +end + +function chainprocs.gpos_mark2base(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local markchar = getchar(start) + if marks[markchar] then + local markanchors = mapping[markchar] -- always 1 step + if markanchors then + local base = getprev(start) -- [glyph] [start=mark] + if base then + local basechar = ischar(base,currentfont) + if basechar then + if marks[basechar] then + while base do + base = getprev(base) + if base then + local basechar = ischar(base,currentfont) + if basechar then + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1) + end + return head, start, false + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2) + end + return head, start, false + end + end + end + local ba = markanchors[1][basechar] + if ba then + local ma = markanchors[2] + if ma then + local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks) + if trace_marks then + logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)", + cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + end + end + elseif trace_bugs then + logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),1) + end + elseif trace_bugs then + logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),2) + end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar)) + end + end + return head, start, false +end + +function chainprocs.gpos_mark2ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local markchar = getchar(start) + if marks[markchar] then + local markanchors = mapping[markchar] -- always 1 step + if markanchors then + local base = getprev(start) -- [glyph] [optional marks] [start=mark] + if base then + local basechar = ischar(base,currentfont) + if basechar then + if marks[basechar] then + while base do + base = getprev(base) + if base then + local basechar = ischar(base,currentfont) + if basechar then + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,1) + end + return head, start, false + end + else + if trace_bugs then + logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,2) + end + return head, start, false + end + end + end + local ba = markanchors[1][basechar] + if ba then + local ma = markanchors[2] + if ma then + local index = getligaindex(start) + ba = ba[index] + if ba then + local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks) + if trace_marks then + logprocess("%s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)", + cref(dataset,sequence),a or bound,gref(markchar),gref(basechar),index,dx,dy) + end + return head, start, true + end + end + end + elseif trace_bugs then + logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),1) + end + elseif trace_bugs then + logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),2) + end + elseif trace_bugs then + logwarning("%s, mark %s has no anchors",cref(dataset,sequence),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s, mark %s is no mark",cref(dataset,sequence),gref(markchar)) + end + end + return head, start, false +end + +function chainprocs.gpos_mark2mark(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local markchar = getchar(start) + if marks[markchar] then + local markanchors = mapping[markchar] -- always 1 step + if markanchors then + local base = getprev(start) -- [glyph] [basemark] [start=mark] + local slc = getligaindex(start) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = getligaindex(base) + if blc and blc ~= slc then + base = getprev(base) + else + break + end + end + end + if base then -- subtype test can go + local basechar = ischar(base,currentfont) + if basechar then + local ba = markanchors[1][basechar] + if ba then + local ma = markanchors[2] + if ma then + local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks) + if trace_marks then + logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)", + cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + end + end + elseif trace_bugs then + logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),1) + end + elseif trace_bugs then + logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),2) + end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar)) + end + end + return head, start, false +end + +function chainprocs.gpos_cursive(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) + local mapping = currentlookup.mapping + if mapping == nil then + mapping = getmapping(dataset,sequence,currentlookup) + end + if mapping then + local startchar = getchar(start) + local exitanchors = mapping[startchar] -- always 1 step + if exitanchors then + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar)) + end + else + local nxt = getnext(start) + while nxt do + local nextchar = ischar(nxt,currentfont) + if not nextchar then + break + elseif marks[nextchar] then + -- should not happen (maybe warning) + nxt = getnext(nxt) + else + local exit = exitanchors[3] + if exit then + local entry = exitanchors[1][nextchar] + if entry then + entry = entry[2] + if entry then + local r2lflag = sequence.flags[4] -- mentioned in the standard + local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode)) + end + return head, start, true + end + end + elseif trace_bugs then + onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) + end + break + end + end + end + elseif trace_cursive and trace_details then + logprocess("%s, cursive %s is already done",pref(dataset,sequence),gref(getchar(start)),alreadydone) + end + end + return head, start, false +end + +-- what pointer to return, spec says stop +-- to be discussed ... is bidi changer a space? +-- elseif char == zwnj and sequence[n][32] then -- brrr + +local function show_skip(dataset,sequence,char,ck,class) + logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a",cref(dataset,sequence),gref(char),class,ck[1],ck[8] or ck[2]) +end + +-- A previous version had disc collapsing code in the (single sub) handler plus some +-- checking in the main loop, but that left the pre/post sequences undone. The best +-- solution is to add some checking there and backtrack when a replace/post matches +-- but it takes a bit of work to figure out an efficient way (this is what the +-- sweep* names refer to). I might look into that variant one day again as it can +-- replace some other code too. In that approach we can have a special version for +-- gub and pos which gains some speed. This method does the test and passes info to +-- the handlers. Here collapsing is handled in the main loop which also makes code +-- elsewhere simpler (i.e. no need for the other special runners and disc code in +-- ligature building). I also experimented with pushing preceding glyphs sequences +-- in the replace/pre fields beforehand which saves checking afterwards but at the +-- cost of duplicate glyphs (memory) but it's too much overhead (runtime). +-- +-- In the meantime Kai had moved the code from the single chain into a more general +-- handler and this one (renamed to chaindisk) is used now. I optimized the code a +-- bit and brought it in sycn with the other code. Hopefully I didn't introduce +-- errors. Note: this somewhat complex approach is meant for fonts that implement +-- (for instance) ligatures by character replacement which to some extend is not +-- that suitable for hyphenation. I also use some helpers. This method passes some +-- states but reparses the list. There is room for a bit of speed up but that will +-- be done in the context version. (In fact a partial rewrite of all code can bring +-- some more efficiency.) +-- +-- I didn't test it with extremes but successive disc nodes still can give issues +-- but in order to handle that we need more complex code which also slows down even +-- more. The main loop variant could deal with that: test, collapse, backtrack. + +local userkern = nuts.pool and nuts.pool.newkern -- context + +do if not userkern then -- generic + + local thekern = nuts.new("kern",1) -- userkern + local setkern = nuts.setkern -- not injections.setkern + + userkern = function(k) + local n = copy_node(thekern) + setkern(n,k) + return n + end + +end end + +local function checked(head) + local current = head + while current do + if getid(current) == glue_code then + local kern = userkern(getwidth(current)) + if head == current then + local next = getnext(current) + if next then + setlink(kern,next) + end + flush_node(current) + head = kern + current = next + else + local prev, next = getboth(current) + setlink(prev,kern,next) + flush_node(current) + current = next + end + else + current = getnext(current) + end + end + return head +end + +local function setdiscchecked(d,pre,post,replace) + if pre then pre = checked(pre) end + if post then post = checked(post) end + if replace then replace = checked(replace) end + setdisc(d,pre,post,replace) +end + +local noflags = { false, false, false, false } + +local function chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck) + + local size = ck[5] - ck[4] + 1 + local chainlookups = ck[6] + local done = false + + -- current match + if chainlookups then + -- Lookups can be like { 1, false, 3 } or { false, 2 } or basically anything and + -- #lookups can be less than #current + + if size == 1 then + + -- if nofchainlookups > size then + -- -- bad rules + -- end + + local chainlookup = chainlookups[1] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local chainkind = chainstep.type + local chainproc = chainprocs[chainkind] + if chainproc then + local ok + head, start, ok = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash) + if ok then + done = true + end + else + logprocess("%s: %s is not yet supported (1)",cref(dataset,sequence),chainkind) + end + end + + else + + -- See LookupType 5: Contextual Substitution Subtable. Now it becomes messy. The + -- easiest case is where #current maps on #lookups i.e. one-to-one. But what if + -- we have a ligature. Cf the spec we then need to advance one character but we + -- really need to test it as there are fonts out there that are fuzzy and have + -- too many lookups: + -- + -- U+1105 U+119E U+1105 U+119E : sourcehansansklight: script=hang ccmp=yes + -- + -- Even worse are these family emoji shapes as they can have multiple lookups + -- per slot (probably only for gpos). + + -- It's very unlikely that we will have skip classes here but still ... we seldom + -- enter this branch anyway. + + local i = 1 + local laststart = start + local nofchainlookups = #chainlookups -- useful? + while start do + if skiphash then -- hm, so we know we skip some + while start do + local char = ischar(start,currentfont) + if char then + if skiphash and skiphash[char] then + start = getnext(start) + else + break + end + else + break + end + end + end + local chainlookup = chainlookups[i] + if chainlookup then + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local chainkind = chainstep.type + local chainproc = chainprocs[chainkind] + if chainproc then + local ok, n + head, start, ok, n = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash,i) + -- messy since last can be changed ! + if ok then + done = true + if n and n > 1 and i + n > nofchainlookups then + -- this is a safeguard, we just ignore the rest of the lookups + i = size -- prevents an advance + break + end + end + else + -- actually an error + logprocess("%s: %s is not yet supported (2)",cref(dataset,sequence),chainkind) + end + end + else + -- we skip but we could also delete as option .. what does an empty lookup actually mean + -- in opentype ... anyway, we could map it onto gsub_remove if needed + end + i = i + 1 + if i > size or not start then + break + elseif start then + laststart = start + start = getnext(start) + end + end + if not start then + start = laststart + end + + end + else + -- todo: needs checking for holes in the replacements + local replacements = ck[7] + if replacements then + head, start, done = reversesub(head,start,last,dataset,sequence,replacements,rlmode,skiphash) + else + done = true + if trace_contexts then + logprocess("%s: skipping match",cref(dataset,sequence)) + end + end + end + return head, start, done +end + +local function chaindisk(head,start,dataset,sequence,rlmode,skiphash,ck) + + if not start then + return head, start, false + end + + local startishead = start == head + local seq = ck[3] + local f = ck[4] + local l = ck[5] + local s = #seq + local done = false + local sweepnode = sweepnode + local sweeptype = sweeptype + local sweepoverflow = false + local checkdisc = getprev(head) + local keepdisc = not sweepnode + local lookaheaddisc = nil + local backtrackdisc = nil + local current = start + local last = start + local prev = getprev(start) + local hasglue = false + + -- fishy: so we can overflow and then go on in the sweep? + -- todo : id can also be glue_code as we checked spaces + + local i = f + while i <= l do + local id = getid(current) + if id == glyph_code then + i = i + 1 + last = current + current = getnext(current) + elseif id == glue_code then + i = i + 1 + last = current + current = getnext(current) + hasglue = true + elseif id == disc_code then + if keepdisc then + keepdisc = false + lookaheaddisc = current + local replace = getreplace(current) + if not replace then + sweepoverflow = true + sweepnode = current + current = getnext(current) + else + -- we can use an iterator + while replace and i <= l do + if getid(replace) == glyph_code then + i = i + 1 + end + replace = getnext(replace) + end + current = getnext(replace) + end + last = current + else + head, current = flattendisk(head,current) + end + else + last = current + current = getnext(current) + end + if current then + -- go on + elseif sweepoverflow then + -- we already are following up on sweepnode + break + elseif sweeptype == "post" or sweeptype == "replace" then + current = getnext(sweepnode) + if current then + sweeptype = nil + sweepoverflow = true + else + break + end + else + break -- added + end + end + + if sweepoverflow then + local prev = current and getprev(current) + if not current or prev ~= sweepnode then + local head = getnext(sweepnode) + local tail = nil + if prev then + tail = prev + setprev(current,sweepnode) + else + tail = find_node_tail(head) + end + setnext(sweepnode,current) + setprev(head) + setnext(tail) + appenddisc(sweepnode,head) + end + end + + if l < s then + local i = l + local t = sweeptype == "post" or sweeptype == "replace" + while current and i < s do + local id = getid(current) + if id == glyph_code then + i = i + 1 + current = getnext(current) + elseif id == glue_code then + i = i + 1 + current = getnext(current) + hasglue = true + elseif id == disc_code then + if keepdisc then + keepdisc = false + if notmatchpre[current] ~= notmatchreplace[current] then + lookaheaddisc = current + end + -- we assume a simple text only replace (we could use nuts.count) + local replace = getreplace(current) + while replace and i < s do + if getid(replace) == glyph_code then + i = i + 1 + end + replace = getnext(replace) + end + current = getnext(current) + elseif notmatchpre[current] ~= notmatchreplace[current] then + head, current = flattendisk(head,current) + else + current = getnext(current) -- HH + end + else + current = getnext(current) + end + if not current and t then + current = getnext(sweepnode) + if current then + sweeptype = nil + end + end + end + end + + if f > 1 then + local current = prev + local i = f + local t = sweeptype == "pre" or sweeptype == "replace" + if not current and t and current == checkdisc then + current = getprev(sweepnode) + end + while current and i > 1 do -- missing getprev added / moved outside + local id = getid(current) + if id == glyph_code then + i = i - 1 + elseif id == glue_code then + i = i - 1 + hasglue = true + elseif id == disc_code then + if keepdisc then + keepdisc = false + if notmatchpost[current] ~= notmatchreplace[current] then + backtrackdisc = current + end + -- we assume a simple text only replace (we could use nuts.count) + local replace = getreplace(current) + while replace and i > 1 do + if getid(replace) == glyph_code then + i = i - 1 + end + replace = getnext(replace) + end + elseif notmatchpost[current] ~= notmatchreplace[current] then + head, current = flattendisk(head,current) + end + end + current = getprev(current) + if t and current == checkdisc then + current = getprev(sweepnode) + end + end + end + local done = false + + if lookaheaddisc then + + local cf = start + local cl = getprev(lookaheaddisc) + local cprev = getprev(start) + local insertedmarks = 0 + + while cprev do + local char = ischar(cf,currentfont) + if char and marks[char] then + insertedmarks = insertedmarks + 1 + cf = cprev + startishead = cf == head + cprev = getprev(cprev) + else + break + end + end + setlink(cprev,lookaheaddisc) + setprev(cf) + setnext(cl) + if startishead then + head = lookaheaddisc + end + local pre, post, replace = getdisc(lookaheaddisc) + local new = copy_node_list(cf) -- br, how often does that happen + local cnew = new + if pre then + setlink(find_node_tail(cf),pre) + end + if replace then + local tail = find_node_tail(new) + setlink(tail,replace) + end + for i=1,insertedmarks do + cnew = getnext(cnew) + end + cl = start + local clast = cnew + for i=f,l do + cl = getnext(cl) + clast = getnext(clast) + end + if not notmatchpre[lookaheaddisc] then + local ok = false + cf, start, ok = chainrun(cf,start,cl,dataset,sequence,rlmode,skiphash,ck) + if ok then + done = true + end + end + if not notmatchreplace[lookaheaddisc] then + local ok = false + new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck) + if ok then + done = true + end + end + if hasglue then + setdiscchecked(lookaheaddisc,cf,post,new) + else + setdisc(lookaheaddisc,cf,post,new) + end + start = getprev(lookaheaddisc) + sweephead[cf] = getnext(clast) or false + sweephead[new] = getnext(cl) or false + + elseif backtrackdisc then + + local cf = getnext(backtrackdisc) + local cl = start + local cnext = getnext(start) + local insertedmarks = 0 + + while cnext do + local char = ischar(cnext,currentfont) + if char and marks[char] then + insertedmarks = insertedmarks + 1 + cl = cnext + cnext = getnext(cnext) + else + break + end + end + setlink(backtrackdisc,cnext) + setprev(cf) + setnext(cl) + local pre, post, replace, pretail, posttail, replacetail = getdisc(backtrackdisc,true) + local new = copy_node_list(cf) + local cnew = find_node_tail(new) + for i=1,insertedmarks do + cnew = getprev(cnew) + end + local clast = cnew + for i=f,l do + clast = getnext(clast) + end + if not notmatchpost[backtrackdisc] then + local ok = false + cf, start, ok = chainrun(cf,start,last,dataset,sequence,rlmode,skiphash,ck) + if ok then + done = true + end + end + if not notmatchreplace[backtrackdisc] then + local ok = false + new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck) + if ok then + done = true + end + end + if post then + setlink(posttail,cf) + else + post = cf + end + if replace then + setlink(replacetail,new) + else + replace = new + end + if hasglue then + setdiscchecked(backtrackdisc,pre,post,replace) + else + setdisc(backtrackdisc,pre,post,replace) + end + start = getprev(backtrackdisc) + sweephead[post] = getnext(clast) or false + sweephead[replace] = getnext(last) or false + + else + + local ok = false + head, start, ok = chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck) + if ok then + done = true + end + + end + + return head, start, done +end + +local function chaintrac(head,start,dataset,sequence,rlmode,skiphash,ck,match,discseen,sweepnode) + local rule = ck[1] + local lookuptype = ck[8] or ck[2] + local nofseq = #ck[3] + local first = ck[4] + local last = ck[5] + local char = getchar(start) + logwarning("%s: rule %s %s at char %s for (%s,%s,%s) chars, lookuptype %a, %sdisc seen, %ssweeping", + cref(dataset,sequence),rule,match and "matches" or "nomatch", + gref(char),first-1,last-first+1,nofseq-last,lookuptype, + discseen and "" or "no ", sweepnode and "" or "not ") +end + +-- The next one is quite optimized but still somewhat slow, fonts like ebgaramond +-- are real torture tests because they have many steps with one context (having +-- multiple contexts makes more sense) also because we (can) reduce them. Instead of +-- a match boolean variable and check for that I decided to use a goto with labels +-- instead. This is one of the cases where it makes the code more readable and we +-- might even gain a bit performance. + +-- when we have less replacements (lookups) then current matches we can push too much into +-- the previous disc .. such be it (<before><disc><current=fl><after> with only f done) + +local function handle_contextchain(head,start,dataset,sequence,contexts,rlmode,skiphash) + -- optimizing for rlmode gains nothing + local sweepnode = sweepnode + local sweeptype = sweeptype + local postreplace + local prereplace + local checkdisc + local discseen -- = false + if sweeptype then + if sweeptype == "replace" then + postreplace = true + prereplace = true + else + postreplace = sweeptype == "post" + prereplace = sweeptype == "pre" + end + checkdisc = getprev(head) + end + local currentfont = currentfont + + local skipped -- = false + + local startprev, + startnext = getboth(start) + local done -- = false + + -- we can have multiple hits and as we scan (currently) all we need to check + -- if we have a match ... contextchains have no real coverage table (with + -- unique entries) + + -- fonts can have many steps (each doing one check) or many contexts + + -- todo: make a per-char cache so that we have small contexts (when we have a context + -- n == 1 and otherwise it can be more so we can even distingish n == 1 or more) + + local nofcontexts = contexts.n -- #contexts + + local startchar = nofcontext == 1 or ischar(start,currentfont) -- only needed in a chain + + for k=1,nofcontexts do -- does this disc mess work well with n > 1 + + local ck = contexts[k] + local seq = ck[3] + local f = ck[4] -- first current + if not startchar or not seq[f][startchar] then + -- report("no hit in %a at %i of %i contexts",sequence.type,k,nofcontexts) + goto next + end + local s = seq.n -- or #seq + local l = ck[5] -- last current + local current = start + local last = start + + if s == 1 then + goto next + end + + -- current match + + if l > f then + -- before/current/after | before/current | current/after + local discfound -- = nil + local n = f + 1 + last = startnext -- the second in current (first already matched) + while n <= l do + if postreplace and not last then + last = getnext(sweepnode) + sweeptype = nil + end + if last then + -- local char, id = ischar(last,currentfont) + local nxt, char, id = isnextchar(last,currentfont) + if char then + if skiphash and skiphash[char] then + skipped = true + if trace_skips then + show_skip(dataset,sequence,char,ck,classes[char]) + end + -- last = getnext(last) + last = nxt + elseif seq[n][char] then + if n < l then + -- last = getnext(last) + last = nxt + end + n = n + 1 + elseif discfound then + notmatchreplace[discfound] = true + if notmatchpre[discfound] then + goto next + else + break + end + else + goto next + end + elseif char == false then + if discfound then + notmatchreplace[discfound] = true + if notmatchpre[discfound] then + goto next + else + break + end + else + goto next + end + elseif id == disc_code then + -- elseif id == disc_code and (not discs or discs[last]) then + discseen = true + discfound = last + notmatchpre[last] = nil + notmatchpost[last] = true + notmatchreplace[last] = nil + local pre, post, replace = getdisc(last) + if pre then + local n = n + while pre do + if seq[n][getchar(pre)] then + n = n + 1 + if n > l then + break + end + pre = getnext(pre) + else + notmatchpre[last] = true + break + end + end + if n <= l then + notmatchpre[last] = true + end + else + notmatchpre[last] = true + end + if replace then + -- so far we never entered this branch + while replace do + if seq[n][getchar(replace)] then + n = n + 1 + if n > l then + break + end + replace = getnext(replace) + else + notmatchreplace[last] = true + if notmatchpre[last] then + goto next + else + break + end + end + end + -- why here again + if notmatchpre[last] then + goto next + end + end + -- maybe only if match + -- last = getnext(last) + last = nxt + else + goto next + end + else + goto next + end + end + end + + -- before + + if f > 1 then + if startprev then + local prev = startprev + if prereplace and prev == checkdisc then + prev = getprev(sweepnode) + end + if prev then + local discfound -- = nil + local n = f - 1 + while n >= 1 do + if prev then + -- local char, id = ischar(prev,currentfont) + local prv, char, id = isprevchar(prev,currentfont) + if char then + if skiphash and skiphash[char] then + skipped = true + if trace_skips then + show_skip(dataset,sequence,char,ck,classes[char]) + end + -- prev = getprev(prev) + prev = prv + elseif seq[n][char] then + if n > 1 then + -- prev = getprev(prev) + prev = prv + end + n = n - 1 + elseif discfound then + notmatchreplace[discfound] = true + if notmatchpost[discfound] then + goto next + else + break + end + else + goto next + end + elseif char == false then + if discfound then + notmatchreplace[discfound] = true + if notmatchpost[discfound] then + goto next + end + else + goto next + end + break + elseif id == disc_code then + -- elseif id == disc_code and (not discs or discs[prev]) then + -- the special case: f i where i becomes dottless i .. + discseen = true + discfound = prev + notmatchpre[prev] = true + notmatchpost[prev] = nil + notmatchreplace[prev] = nil + local pre, post, replace, pretail, posttail, replacetail = getdisc(prev,true) + -- weird test: needs checking + if pre ~= start and post ~= start and replace ~= start then + if post then + local n = n + while posttail do + if seq[n][getchar(posttail)] then + n = n - 1 + if posttail == post or n < 1 then + break + else + posttail = getprev(posttail) + end + else + notmatchpost[prev] = true + break + end + end + if n >= 1 then + notmatchpost[prev] = true + end + else + notmatchpost[prev] = true + end + if replace then + -- we seldom enter this branch (e.g. on brill efficient) + while replacetail do + if seq[n][getchar(replacetail)] then + n = n - 1 + if replacetail == replace or n < 1 then + break + else + replacetail = getprev(replacetail) + end + else + notmatchreplace[prev] = true + if notmatchpost[prev] then + goto next + else + break + end + end + end + else + -- notmatchreplace[prev] = true -- not according to Kai + end + end + -- prev = getprev(prev) + prev = prv + -- elseif id == glue_code and seq[n][32] and isspace(prev,threshold,id) then + -- elseif seq[n][32] and spaces[prev] then + -- n = n - 1 + -- -- prev = getprev(prev) + -- prev = prv + elseif id == glue_code then + local sn = seq[n] + if (sn[32] and spaces[prev]) or sn[0xFFFC] then + n = n - 1 + -- prev = getprev(prev) + prev = prv + else + goto next + end + elseif seq[n][0xFFFC] then + n = n - 1 + -- prev = getprev(prev) + prev = prv + else + goto next + end + else + goto next + end + end + else + goto next + end + else + goto next + end + end + + -- after + + if s > l then + local current = last and getnext(last) + if not current and postreplace then + current = getnext(sweepnode) + end + if current then + local discfound -- = nil + local n = l + 1 + while n <= s do + if current then + -- local char, id = ischar(current,currentfont) + local nxt, char, id = isnextchar(current,currentfont) + if char then + if skiphash and skiphash[char] then + skipped = true + if trace_skips then + show_skip(dataset,sequence,char,ck,classes[char]) + end + -- current = getnext(current) -- was absent + current = nxt -- was absent + elseif seq[n][char] then + if n < s then -- new test + current = getnext(current) -- was absent + end + n = n + 1 + elseif discfound then + notmatchreplace[discfound] = true + if notmatchpre[discfound] then + goto next + else + break + end + else + goto next + end + elseif char == false then + if discfound then + notmatchreplace[discfound] = true + if notmatchpre[discfound] then + goto next + else + break + end + else + goto next + end + elseif id == disc_code then + -- elseif id == disc_code and (not discs or discs[current]) then + discseen = true + discfound = current + notmatchpre[current] = nil + notmatchpost[current] = true + notmatchreplace[current] = nil + local pre, post, replace = getdisc(current) + if pre then + local n = n + while pre do + if seq[n][getchar(pre)] then + n = n + 1 + if n > s then + break + else + pre = getnext(pre) + end + else + notmatchpre[current] = true + break + end + end + if n <= s then + notmatchpre[current] = true + end + else + notmatchpre[current] = true + end + if replace then + -- so far we never entered this branch + while replace do + if seq[n][getchar(replace)] then + n = n + 1 + if n > s then + break + else + replace = getnext(replace) + end + else + notmatchreplace[current] = true + if notmatchpre[current] then + goto next + else + break + end + end + end + else + -- notmatchreplace[current] = true -- not according to Kai + end + current = getnext(current) + elseif id == glue_code then + local sn = seq[n] + if (sn[32] and spaces[current]) or sn[0xFFFC] then + n = n + 1 + -- current = getnext(current) + current = nxt + else + goto next + end + elseif seq[n][0xFFFC] then + n = n + 1 + -- current = getnext(current) + current = nxt + else + goto next + end + else + goto next + end + end + else + goto next + end + end + + if trace_contexts then + chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,true,discseen,sweepnode) + end + if discseen or sweepnode then + head, start, done = chaindisk(head,start,dataset,sequence,rlmode,skipped and skiphash,ck) + else + head, start, done = chainrun(head,start,last,dataset,sequence,rlmode,skipped and skiphash,ck) + end + if done then + break + -- else + -- next context + end + ::next:: + -- if trace_chains then + -- chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,false,discseen,sweepnode) + -- end + end + if discseen then + notmatchpre = { } + notmatchpost = { } + notmatchreplace = { } + -- notmatchpre = { a = 1, b = 1 } notmatchpre .a = nil notmatchpre .b = nil + -- notmatchpost = { a = 1, b = 1 } notmatchpost .a = nil notmatchpost .b = nil + -- notmatchreplace = { a = 1, b = 1 } notmatchreplace.a = nil notmatchreplace.b = nil + end + return head, start, done +end + +handlers.gsub_context = handle_contextchain +handlers.gsub_contextchain = handle_contextchain +handlers.gsub_reversecontextchain = handle_contextchain +handlers.gpos_contextchain = handle_contextchain +handlers.gpos_context = handle_contextchain + +-- this needs testing + +local function chained_contextchain(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash) + local steps = currentlookup.steps + local nofsteps = currentlookup.nofsteps + if nofsteps > 1 then + reportmoresteps(dataset,sequence) + end + -- probably wrong + local l = steps[1].coverage[getchar(start)] + if l then + return handle_contextchain(head,start,dataset,sequence,l,rlmode,skiphash) + else + return head, start, false + end +end + +chainprocs.gsub_context = chained_contextchain +chainprocs.gsub_contextchain = chained_contextchain +chainprocs.gsub_reversecontextchain = chained_contextchain +chainprocs.gpos_contextchain = chained_contextchain +chainprocs.gpos_context = chained_contextchain + +------------------------------ + +-- experiment (needs no handler in font-otc so not now): +-- +-- function otf.registerchainproc(name,f) +-- -- chainprocs[name] = f +-- chainprocs[name] = function(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash) +-- local done = currentlookup.nofsteps > 0 +-- if not done then +-- reportzerosteps(dataset,sequence) +-- else +-- head, start, done = f(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash) +-- if not head or not start then +-- reportbadsteps(dataset,sequence) +-- end +-- end +-- return head, start, done +-- end +-- end + +local missing = setmetatableindex("table") +local logwarning = report_process +local resolved = { } -- we only resolve a font,script,language pair once + +local function logprocess(...) + if trace_steps then + registermessage(...) + if trace_steps == "silent" then + return + end + end + report_process(...) +end + +-- todo: pass all these 'locals' in a table + +local sequencelists = setmetatableindex(function(t,font) + local sequences = fontdata[font].resources.sequences + if not sequences or not next(sequences) then + sequences = false + end + t[font] = sequences + return sequences +end) + +-- fonts.hashes.sequences = sequencelists + +do -- overcome local limit + + local autofeatures = fonts.analyzers.features + local featuretypes = otf.tables.featuretypes + local defaultscript = otf.features.checkeddefaultscript + local defaultlanguage = otf.features.checkeddefaultlanguage + + local wildcard = "*" + local default = "dflt" + + local function initialize(sequence,script,language,enabled,autoscript,autolanguage) + local features = sequence.features + if features then + local order = sequence.order + if order then + local featuretype = featuretypes[sequence.type or "unknown"] + for i=1,#order do + local kind = order[i] + local valid = enabled[kind] + if valid then + local scripts = features[kind] + local languages = scripts and ( + scripts[script] or + scripts[wildcard] or + (autoscript and defaultscript(featuretype,autoscript,scripts)) + ) + local enabled = languages and ( + languages[language] or + languages[wildcard] or + (autolanguage and defaultlanguage(featuretype,autolanguage,languages)) + ) + if enabled then + return { valid, autofeatures[kind] or false, sequence, kind } + end + end + end + else + -- can't happen + end + end + return false + end + + function otf.dataset(tfmdata,font) -- generic variant, overloaded in context + local shared = tfmdata.shared + local properties = tfmdata.properties + local language = properties.language or "dflt" + local script = properties.script or "dflt" + local enabled = shared.features + local autoscript = enabled and enabled.autoscript + local autolanguage = enabled and enabled.autolanguage + local res = resolved[font] + if not res then + res = { } + resolved[font] = res + end + local rs = res[script] + if not rs then + rs = { } + res[script] = rs + end + local rl = rs[language] + if not rl then + rl = { + -- indexed but we can also add specific data by key + } + rs[language] = rl + local sequences = tfmdata.resources.sequences + if sequences then + for s=1,#sequences do + local v = enabled and initialize(sequences[s],script,language,enabled,autoscript,autolanguage) + if v then + rl[#rl+1] = v + end + end + end + end + return rl + end + +end + +-- Functions like kernrun, comprun etc evolved over time and in the end look rather +-- complex. It's a bit of a compromis between extensive copying and creating subruns. +-- The logic has been improved a lot by Kai and Ivo who use complex fonts which +-- really helped to identify border cases on the one hand and get insight in the diverse +-- ways fonts implement features (not always that consistent and efficient). At the same +-- time I tried to keep the code relatively efficient so that the overhead in runtime +-- stays acceptable. + +local function report_disc(what,n) + report_run("%s: %s > %s",what,n,languages.serializediscretionary(n)) +end + +local function kernrun(disc,k_run,font,attr,...) + -- + -- we catch <font 1><disc font 2> + -- + if trace_kernruns then + report_disc("kern",disc) + end + -- + local prev, next = getboth(disc) + -- + local nextstart = next + local done = false + -- + local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true) + -- + local prevmarks = prev + -- + -- can be optional, because why on earth do we get a disc after a mark (okay, maybe when a ccmp + -- has happened but then it should be in the disc so basically this test indicates an error) + -- + while prevmarks do + local char = ischar(prevmarks,font) + if char and marks[char] then + prevmarks = getprev(prevmarks) + else + break + end + end + -- + if prev and not ischar(prev,font) then -- and (pre or replace) + prev = false + end + if next and not ischar(next,font) then -- and (post or replace) + next = false + end + -- + -- we need to get rid of this nest mess some day .. has to be done otherwise + -- + if pre then + if k_run(pre,"injections",nil,font,attr,...) then + done = true + end + if prev then + setlink(prev,pre) + if k_run(prevmarks,"preinjections",pre,font,attr,...) then -- or prev? + done = true + end + setprev(pre) + setlink(prev,disc) + end + end + -- + if post then + if k_run(post,"injections",nil,font,attr,...) then + done = true + end + if next then + setlink(posttail,next) + if k_run(posttail,"postinjections",next,font,attr,...) then + done = true + end + setnext(posttail) + setlink(disc,next) + end + end + -- + if replace then + if k_run(replace,"injections",nil,font,attr,...) then + done = true + end + if prev then + setlink(prev,replace) + if k_run(prevmarks,"replaceinjections",replace,font,attr,...) then -- getnext(replace)) + done = true + end + setprev(replace) + setlink(prev,disc) + end + if next then + setlink(replacetail,next) + if k_run(replacetail,"replaceinjections",next,font,attr,...) then + done = true + end + setnext(replacetail) + setlink(disc,next) + end + elseif prev and next then + setlink(prev,next) + if k_run(prevmarks,"emptyinjections",next,font,attr,...) then + done = true + end + setlink(prev,disc,next) + end + if done and trace_testruns then + report_disc("done",disc) + end + return nextstart, done +end + +-- fonts like ebgaramond do ligatures this way (less efficient than e.g. dejavu which +-- will do the testrun variant) + +local function comprun(disc,c_run,...) -- vararg faster than the whole list + if trace_compruns then + report_disc("comp",disc) + end + -- + local pre, post, replace = getdisc(disc) + local renewed = false + -- + if pre then + sweepnode = disc + sweeptype = "pre" -- in alternative code preinjections is used (also used then for properties, saves a variable) + local new, done = c_run(pre,...) + if done then + pre = new + renewed = true + end + end + -- + if post then + sweepnode = disc + sweeptype = "post" + local new, done = c_run(post,...) + if done then + post = new + renewed = true + end + end + -- + if replace then + sweepnode = disc + sweeptype = "replace" + local new, done = c_run(replace,...) + if done then + replace = new + renewed = true + end + end + -- + sweepnode = nil + sweeptype = nil + if renewed then + if trace_testruns then + report_disc("done",disc) + end + setdisc(disc,pre,post,replace) + end + -- + return getnext(disc), renewed +end + +-- if we can hyphenate in a lig then unlikely a lig so we +-- could have a option here to ignore lig + +local function testrun(disc,t_run,c_run,...) + if trace_testruns then + report_disc("test",disc) + end + local prev, next = getboth(disc) + if not next then + -- weird discretionary + return + end + local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true) + local renewed = false + if post or replace then -- and prev then -- hm, we can start with a disc + if post then + setlink(posttail,next) + else + post = next + end + if replace then + setlink(replacetail,next) + else + replace = next + end + local d_post = t_run(post,next,...) + local d_replace = t_run(replace,next,...) + if d_post > 0 or d_replace > 0 then + local d = d_replace > d_post and d_replace or d_post + local head = getnext(disc) -- is: next + local tail = head + for i=2,d do -- must start at 2 according to Kai + local nx = getnext(tail) + local id = getid(nx) + if id == disc_code then + head, tail = flattendisk(head,nx) + elseif id == glyph_code then + tail = nx + else + -- we can have overrun into a glue + break + end + end + next = getnext(tail) + setnext(tail) + setprev(head) + local new = copy_node_list(head) + if posttail then + setlink(posttail,head) + else + post = head + end + if replacetail then + setlink(replacetail,new) + else + replace = new + end + else + -- we stay inside the disc + if posttail then + setnext(posttail) + else + post = nil + end + if replacetail then + setnext(replacetail) + else + replace = nil + end + end + setlink(disc,next) + -- pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true) + end + -- + -- like comprun + -- + if trace_testruns then + report_disc("more",disc) + end + -- + if pre then + sweepnode = disc + sweeptype = "pre" + local new, ok = c_run(pre,...) + if ok then + pre = new + renewed = true + end + end + -- + if post then + sweepnode = disc + sweeptype = "post" + local new, ok = c_run(post,...) + if ok then + post = new + renewed = true + end + end + -- + if replace then + sweepnode = disc + sweeptype = "replace" + local new, ok = c_run(replace,...) + if ok then + replace = new + renewed = true + end + end + -- + sweepnode = nil + sweeptype = nil + if renewed then + setdisc(disc,pre,post,replace) + if trace_testruns then + report_disc("done",disc) + end + end + -- next can have changed (copied list) + return getnext(disc), renewed +end + +-- 1{2{\oldstyle\discretionary{3}{4}{5}}6}7\par +-- 1{2\discretionary{3{\oldstyle3}}{{\oldstyle4}4}{5{\oldstyle5}5}6}7\par + +local nesting = 0 + +local function c_run_single(head,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler) + local done = false + local sweep = sweephead[head] + local start + if sweep then + start = sweep + -- sweephead[head] = nil + sweephead[head] = false + else + start = head + end + while start do + local char, id = ischar(start,font) + if char then + local a -- happens often so no assignment is faster + if attr then + a = getglyphdata(start) + end + if not a or (a == attr) then + local lookupmatch = lookupcache[char] + if lookupmatch then + local ok + head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step) + if ok then + done = true + end + end + if start then + start = getnext(start) + end + else + -- go on can be a mixed one + start = getnext(start) + end + elseif char == false then + return head, done + elseif sweep then + -- else we loose the rest + return head, done + else + -- in disc component + start = getnext(start) + end + end + return head, done +end + +-- only replace? + +local function t_run_single(start,stop,font,attr,lookupcache) + local lastd = nil + while start ~= stop do + local char = ischar(start,font) + if char then + local a -- happens often so no assignment is faster + if attr then + a = getglyphdata(start) + end + local startnext = getnext(start) + if not a or (a == attr) then + local lookupmatch = lookupcache[char] + if lookupmatch then -- hm, hyphens can match (tlig) so we need to really check + -- if we need more than ligatures we can outline the code and use functions + local s = startnext + local ss = nil + local sstop = s == stop + if not s then + s = ss + ss = nil + end + -- a bit weird: why multiple ... anyway we can't have a disc in a disc + -- how about post ... we can probably merge this into the while + while getid(s) == disc_code do + ss = getnext(s) + s = getreplace(s) + if not s then + s = ss + ss = nil + end + end + local l = nil + local d = 0 + while s do + local char = ischar(s,font) + if char then + local lg = lookupmatch[char] + if lg then + if sstop then + d = 1 + elseif d > 0 then + d = d + 1 + end + l = lg + s = getnext(s) + sstop = s == stop + if not s then + s = ss + ss = nil + end + while getid(s) == disc_code do + ss = getnext(s) + s = getreplace(s) + if not s then + s = ss + ss = nil + end + end + lookupmatch = lg + else + break + end + else + break + end + end + if l and l.ligature then -- so we test for ligature + lastd = d + end + -- why not: if not l then break elseif l.ligature then return d end + else + -- why not: break + -- no match (yet) + end + else + -- go on can be a mixed one + -- why not: break + end + if lastd then + return lastd + end + start = startnext + else + break + end + end + return 0 +end + +local function k_run_single(sub,injection,last,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler) + local a -- happens often so no assignment is faster + if attr then + a = getglyphdata(sub) + end + if not a or (a == attr) then + for n in nextnode, sub do -- only gpos + if n == last then + break + end + local char = ischar(n,font) + if char then + local lookupmatch = lookupcache[char] + if lookupmatch then + local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection) + if ok then + return true + end + end + end + end + end +end + +local function c_run_multiple(head,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler) + local done = false + local sweep = sweephead[head] + local start + if sweep then + start = sweep + -- sweephead[head] = nil + sweephead[head] = false + else + start = head + end + while start do + local char = ischar(start,font) + if char then + local a -- happens often so no assignment is faster + if attr then + a = getglyphdata(start) + end + if not a or (a == attr) then + for i=1,nofsteps do + local step = steps[i] + local lookupcache = step.coverage + local lookupmatch = lookupcache[char] + if lookupmatch then + -- we could move all code inline but that makes things even more unreadable + local ok + head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step) + if ok then + done = true + break + elseif not start then + -- don't ask why ... shouldn't happen + break + end + end + end + if start then + start = getnext(start) + end + else + -- go on can be a mixed one + start = getnext(start) + end + elseif char == false then + -- whatever glyph + return head, done + elseif sweep then + -- else we loose the rest + return head, done + else + -- in disc component + start = getnext(start) + end + end + return head, done +end + +local function t_run_multiple(start,stop,font,attr,steps,nofsteps) + local lastd = nil + while start ~= stop do + local char = ischar(start,font) + if char then + local a -- happens often so no assignment is faster + if attr then + a = getglyphdata(start) + end + local startnext = getnext(start) + if not a or (a == attr) then + for i=1,nofsteps do + local step = steps[i] + local lookupcache = step.coverage + local lookupmatch = lookupcache[char] + if lookupmatch then + -- if we need more than ligatures we can outline the code and use functions + local s = startnext + local ss = nil + local sstop = s == stop + if not s then + s = ss + ss = nil + end + while getid(s) == disc_code do + ss = getnext(s) + s = getreplace(s) + if not s then + s = ss + ss = nil + end + end + local l = nil + local d = 0 + while s do + local char = ischar(s) + if char then + local lg = lookupmatch[char] + if lg then + if sstop then + d = 1 + elseif d > 0 then + d = d + 1 + end + l = lg + s = getnext(s) + sstop = s == stop + if not s then + s = ss + ss = nil + end + while getid(s) == disc_code do + ss = getnext(s) + s = getreplace(s) + if not s then + s = ss + ss = nil + end + end + lookupmatch = lg + else + break + end + else + break + end + end + if l and l.ligature then + lastd = d + end + end + end + else + -- go on can be a mixed one + end + if lastd then + return lastd + end + start = startnext + else + break + end + end + return 0 +end + +local function k_run_multiple(sub,injection,last,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler) + local a -- happens often so no assignment is faster + if attr then + a = getglyphdata(sub) + end + if not a or (a == attr) then + for n in nextnode, sub do -- only gpos + if n == last then + break + end + local char = ischar(n) + if char then + for i=1,nofsteps do + local step = steps[i] + local lookupcache = step.coverage + local lookupmatch = lookupcache[char] + if lookupmatch then + local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection) -- sub was head + if ok then + return true + end + end + end + end + end + end +end + +local txtdirstate, pardirstate do -- this might change (no need for nxt in pardirstate) + + local getdirection = nuts.getdirection + + txtdirstate = function(start,stack,top,rlparmode) + local dir, pop = getdirection(start) + if pop then + if top == 1 then + return 0, rlparmode + else + top = top - 1 + if stack[top] == righttoleft_code then + return top, -1 + else + return top, 1 + end + end + elseif dir == lefttoright_code then + top = top + 1 + stack[top] = lefttoright_code + return top, 1 + elseif dir == righttoleft_code then + top = top + 1 + stack[top] = righttoleft_code + return top, -1 + else + return top, rlparmode + end + end + + pardirstate = function(start) + local dir = getdirection(start) + if dir == lefttoright_code then + return 1, 1 + elseif dir == righttoleft_code then + return -1, -1 + else + return 0, 0 + end + end + +end + +-- These are non public helpers that can change without notice! + +otf.helpers = otf.helpers or { } +otf.helpers.txtdirstate = txtdirstate +otf.helpers.pardirstate = pardirstate + +-- This is the main loop. We run over the node list dealing with a specific font. The +-- attribute is a context specific thing. We could work on sub start-stop ranges instead +-- but I wonder if there is that much speed gain (experiments showed that it made not +-- much sense) and we need to keep track of directions anyway. Also at some point I +-- want to play with font interactions and then we do need the full sweeps. Apart from +-- optimizations the principles of processing the features hasn't changed much since +-- the beginning. + +do + + -- This is a measurable experimental speedup (only with hyphenated text and multiple + -- fonts per processor call), especially for fonts with lots of contextual lookups. + + local fastdisc = true + local testdics = false + + directives.register("otf.fastdisc",function(v) fastdisc = v end) -- normally enabled + + -- using a merged combined hash as first test saves some 30% on ebgaramond and + -- about 15% on arabtype .. then moving the a test also saves a bit (even when + -- often a is not set at all so that one is a bit debatable + + local otfdataset = nil -- todo: make an installer + + local getfastdisc = { __index = function(t,k) + local v = usesfont(k,currentfont) + t[k] = v + return v + end } + + local getfastspace = { __index = function(t,k) + -- we don't pass the id so that one can overload isspace + local v = isspace(k,threshold) or false + t[k] = v + return v + end } + + function otf.featuresprocessor(head,font,attr,direction,n) + + local sequences = sequencelists[font] -- temp hack + + nesting = nesting + 1 + + if nesting == 1 then + currentfont = font + tfmdata = fontdata[font] + descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead + characters = tfmdata.characters -- but this branch is not entered that often anyway + local resources = tfmdata.resources + marks = resources.marks + classes = resources.classes + threshold, + factor = getthreshold(font) + checkmarks = tfmdata.properties.checkmarks + + if not otfdataset then + otfdataset = otf.dataset + end + + discs = fastdisc and n and n > 1 and setmetatable({},getfastdisc) -- maybe inline + spaces = setmetatable({},getfastspace) + + elseif currentfont ~= font then + + report_warning("nested call with a different font, level %s, quitting",nesting) + nesting = nesting - 1 + return head, false + + end + + -- some 10% faster when no dynamics but hardly measureable on real runs .. but: it only + -- works when we have no other dynamics as otherwise the zero run will be applied to the + -- whole stream for which we then need to pass another variable which we won't + + -- if attr == 0 then + -- attr = false + -- end + + if trace_steps then + checkstep(head) + end + + local initialrl = 0 + + if getid(head) == par_code and start_of_par(head) then + initialrl = pardirstate(head) + elseif direction == righttoleft_code then + initialrl = -1 + end + + -- local done = false + local datasets = otfdataset(tfmdata,font,attr) + local dirstack = { nil } -- could move outside function but we can have local runs + sweephead = { } + -- sweephead = { a = 1, b = 1 } sweephead.a = nil sweephead.b = nil + + -- Keeping track of the headnode is needed for devanagari. (I generalized it a bit + -- so that multiple cases are also covered.) We could prepend a temp node. + + -- We don't goto the next node when a disc node is created so that we can then treat + -- the pre, post and replace. It's a bit of a hack but works out ok for most cases. + + for s=1,#datasets do + local dataset = datasets[s] + local attribute = dataset[2] + local sequence = dataset[3] -- sequences[s] -- also dataset[5] + local rlparmode = initialrl + local topstack = 0 + local typ = sequence.type + local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset + local forcetestrun = typ == "gsub_ligature" -- testrun is only for ligatures + local handler = handlers[typ] -- store in dataset + local steps = sequence.steps + local nofsteps = sequence.nofsteps + local skiphash = sequence.skiphash + + if not steps then + -- This permits injection, watch the different arguments. Watch out, the arguments passed + -- are not frozen as we might extend or change this. Is this used at all apart from some + -- experiments? + local h, ok = handler(head,dataset,sequence,initialrl,font,attr) -- less arguments now + -- if ok then + -- done = true + -- end + if h and h ~= head then + head = h + end + elseif typ == "gsub_reversecontextchain" then + -- + -- This might need a check: if we have #before or #after > 0 then we might need to reverse + -- the before and after lists in the loader. But first I need to see a font that uses multiple + -- matches. + -- + local start = find_node_tail(head) + local rlmode = 0 -- how important is this .. do we need to check for dir? + local merged = steps.merged + while start do + local char = ischar(start,font) + if char then + local m = merged[char] + if m then + local a -- happens often so no assignment is faster + if attr then + a = getglyphdata(start) + end + if not a or (a == attr) then + for i=m[1],m[2] do + local step = steps[i] + -- for i=1,#m do + -- local step = m[i] + local lookupcache = step.coverage + local lookupmatch = lookupcache[char] + if lookupmatch then + local ok + head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step) + if ok then + -- done = true + break + end + end + end + if start then + start = getprev(start) + end + else + start = getprev(start) + end + else + start = getprev(start) + end + else + start = getprev(start) + end + end + else + local start = head + local rlmode = initialrl + if nofsteps == 1 then -- happens often + local step = steps[1] + local lookupcache = step.coverage + while start do + -- local char, id = ischar(start,font) + local nxt, char, id = isnextchar(start,font) + if char then + if skiphash and skiphash[char] then -- we never needed it here but let's try + -- start = getnext(start) + start = nxt + else + local lookupmatch = lookupcache[char] + if lookupmatch then + local a -- happens often so no assignment is faster + if attr then + if getglyphdata(start) == attr and (not attribute or getstate(start,attribute)) then + a = true + end + elseif not attribute or getstate(start,attribute) then + a = true + end + if a then + local ok, df + head, start, ok, df = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step) + -- if ok then + -- done = true + -- end + if df then + -- print("restart 1",typ) + elseif start then + start = getnext(start) -- can be a new start + end + else + -- start = getnext(start) + start = nxt + end + else + -- start = getnext(start) + start = nxt + end + end + elseif char == false or id == glue_code then + -- a different font|state or glue (happens often) + -- start = getnext(start) + start = nxt + elseif id == disc_code then + if not discs or discs[start] == true then + local ok + if gpossing then + start, ok = kernrun(start,k_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler) + elseif forcetestrun then + start, ok = testrun(start,t_run_single,c_run_single,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler) + else + start, ok = comprun(start,c_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler) + end + -- if ok then + -- done = true + -- end + else + -- start = getnext(start) + start = nxt + end + elseif id == math_code then + start = getnext(end_of_math(start)) + elseif id == dir_code then + topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode) + -- start = getnext(start) + start = nxt + -- elseif id == par_code and start_of_par(start) then + -- rlparmode, rlmode = pardirstate(start) + -- -- start = getnext(start) + -- start = nxt + else + -- start = getnext(start) + start = nxt + end + end + else + local merged = steps.merged + while start do + -- local char, id = ischar(start,font) + local nxt, char, id = isnextchar(start,font) + if char then + if skiphash and skiphash[char] then -- we never needed it here but let's try + -- start = getnext(start) + start = nxt + else + local m = merged[char] + if m then + local a -- happens often so no assignment is faster + if attr then + if getglyphdata(start) == attr and (not attribute or getstate(start,attribute)) then + a = true + end + elseif not attribute or getstate(start,attribute) then + a = true + end + if a then + local ok, df + for i=m[1],m[2] do + local step = steps[i] + -- for i=1,#m do + -- local step = m[i] + local lookupcache = step.coverage + local lookupmatch = lookupcache[char] + if lookupmatch then + -- we could move all code inline but that makes things even more unreadable +-- local ok, df + head, start, ok, df = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step) + if df then + break + elseif ok then + -- done = true + break + elseif not start then + -- don't ask why ... shouldn't happen + break + end + end + end + if df then + -- print("restart 2",typ) + elseif start then + start = getnext(start) -- can be a new next + end + else + -- start = getnext(start) + start = nxt + end + else + -- start = getnext(start) + start = nxt + end + end + elseif char == false or id == glue_code then + -- a different font|state or glue (happens often) + -- start = getnext(start) + start = nxt + elseif id == disc_code then + if not discs or discs[start] == true then + local ok + if gpossing then + start, ok = kernrun(start,k_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler) + elseif forcetestrun then + start, ok = testrun(start,t_run_multiple,c_run_multiple,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler) + else + start, ok = comprun(start,c_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler) + end + -- if ok then + -- done = true + -- end + else + -- start = getnext(start) + start = nxt + end + elseif id == math_code then + start = getnext(end_of_math(start)) + elseif id == dir_code then + topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode) + -- start = getnext(start) + start = nxt + -- elseif id == par_code and start_of_par(start) then + -- rlparmode, rlmode = pardirstate(start) + -- start = getnext(start) + else + -- start = getnext(start) + start = nxt + end + end + end + end + + if trace_steps then -- ? + registerstep(head) + end + + end + + nesting = nesting - 1 + + -- return head, done + return head + end + + -- This is not an official helper and used for tracing experiments. It can be changed as I like + -- at any moment. At some point it might be used in a module that can help font development. + + function otf.datasetpositionprocessor(head,font,direction,dataset) + + currentfont = font + tfmdata = fontdata[font] + descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead + characters = tfmdata.characters -- but this branch is not entered that often anyway + local resources = tfmdata.resources + marks = resources.marks + classes = resources.classes + threshold, + factor = getthreshold(font) + checkmarks = tfmdata.properties.checkmarks + + if type(dataset) == "number" then + dataset = otfdataset(tfmdata,font,0)[dataset] + end + + local sequence = dataset[3] -- sequences[s] -- also dataset[5] + local typ = sequence.type + -- local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset + + -- gpos_contextchain gpos_context + + -- if not gpossing then + -- return head, false + -- end + + local handler = handlers[typ] -- store in dataset + local steps = sequence.steps + local nofsteps = sequence.nofsteps + + local done = false + local dirstack = { nil } -- could move outside function but we can have local runs (maybe a few more nils) + local start = head + local initialrl = (direction == righttoleft_code) and -1 or 0 + local rlmode = initialrl + local rlparmode = initialrl + local topstack = 0 + local merged = steps.merged + + -- local matches = false + local position = 0 + + while start do + -- local char, id = ischar(start,font) + local nxt, char, id = isnextchar(start,font) + if char then + position = position + 1 + local m = merged[char] + if m then + if skiphash and skiphash[char] then -- we never needed it here but let's try + -- start = getnext(start) + start = nxt + else + for i=m[1],m[2] do + local step = steps[i] + local lookupcache = step.coverage + local lookupmatch = lookupcache[char] + if lookupmatch then + local ok + head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step) + if ok then + -- if matches then + -- matches[position] = i + -- else + -- matches = { [position] = i } + -- end + break + elseif not start then + break + end + end + end + if start then + start = getnext(start) -- start can be new + end + end + else + -- start = getnext(start) + start = nxt + end + elseif char == false or id == glue_code then + -- a different font|state or glue (happens often) + -- start = getnext(start) + start = nxt + elseif id == math_code then + start = getnext(end_of_math(start)) + elseif id == dir_code then + topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode) + -- start = getnext(start) + start = nxt + -- elseif id == par_code and start_of_par(start) then + -- rlparmode, rlmode = pardirstate(start) + -- -- start = getnext(start) + -- start = nxt + else + -- start = getnext(start) + start = nxt + end + end + + return head + end + + -- end of experiment + +end + +-- so far + +local plugins = { } +otf.plugins = plugins + +local report = logs.reporter("fonts") + +function otf.registerplugin(name,f) + if type(name) == "string" and type(f) == "function" then + plugins[name] = { name, f } + report() + report("plugin %a has been loaded, please be aware of possible side effects",name) + report() + if logs.pushtarget then + logs.pushtarget("log") + end + report("Plugins are not officially supported unless stated otherwise. This is because") + report("they bypass the regular font handling and therefore some features in ConTeXt") + report("(especially those related to fonts) might not work as expected or might not work") + report("at all. Some plugins are for testing and development only and might change") + report("whenever we feel the need for it.") + report() + if logs.poptarget then + logs.poptarget() + end + end +end + +function otf.plugininitializer(tfmdata,value) + if type(value) == "string" then + tfmdata.shared.plugin = plugins[value] + end +end + +function otf.pluginprocessor(head,font,attr,direction) -- n + local s = fontdata[font].shared + local p = s and s.plugin + if p then + if trace_plugins then + report_process("applying plugin %a",p[1]) + end + return p[2](head,font,attr,direction) + else + return head, false + end +end + +function otf.featuresinitializer(tfmdata,value) + -- nothing done here any more +end + +registerotffeature { + name = "features", + description = "features", + default = true, + initializers = { + position = 1, + node = otf.featuresinitializer, + plug = otf.plugininitializer, + }, + processors = { + node = otf.featuresprocessor, + plug = otf.pluginprocessor, + } +} + +-- Moved here (up) a bit. This doesn't really belong in generic so it will +-- move to a context module some day. + +local function markinitializer(tfmdata,value) + local properties = tfmdata.properties + properties.checkmarks = value +end + +registerotffeature { + name = "checkmarks", + description = "check mark widths", + default = true, + initializers = { + node = markinitializer, + }, +} + +-- This can be used for extra handlers, but should be used with care! We implement one +-- here but some more can be found in the osd (script devanagary) file. Now watch out: +-- when a handler has steps, it is called as the other ones, but when we have no steps, +-- we use a different call: +-- +-- function(head,dataset,sequence,initialrl,font,attr) +-- return head, done +-- end +-- +-- Also see (!!). + +otf.handlers = handlers + +if context then + return +else + -- todo: move the following code someplace else +end + +local setspacekerns = nodes.injections.setspacekerns if not setspacekerns then os.exit() end + +local tag = "kern" + +-- if fontfeatures then + +-- function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr) +-- local features = fontfeatures[font] +-- local enabled = features and features.spacekern and features[tag] +-- if enabled then +-- setspacekerns(font,sequence) +-- end +-- return head, enabled +-- end + +-- else -- generic (no hashes) + + function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr) + local shared = fontdata[font].shared + local features = shared and shared.features + local enabled = features and features.spacekern and features[tag] + if enabled then + setspacekerns(font,sequence) + end + return head, enabled + end + +-- end + +-- There are fonts out there that change the space but we don't do that kind of +-- things in TeX. + +local function hasspacekerns(data) + local resources = data.resources + local sequences = resources.sequences + local validgpos = resources.features.gpos + if validgpos and sequences then + for i=1,#sequences do + local sequence = sequences[i] + local steps = sequence.steps + if steps and sequence.features[tag] then + local kind = sequence.type + if kind == "gpos_pair" or kind == "gpos_single" then + for i=1,#steps do + local step = steps[i] + local coverage = step.coverage + local rules = step.rules + if rules then + -- not now: analyze (simple) rules + elseif not coverage then + -- nothing to do + elseif kind == "gpos_single" then + -- maybe a message that we ignore + elseif kind == "gpos_pair" then + local format = step.format + if format == "move" or format == "kern" then + local kerns = coverage[32] + if kerns then + return true + end + for k, v in next, coverage do + if v[32] then + return true + end + end + elseif format == "pair" then + local kerns = coverage[32] + if kerns then + for k, v in next, kerns do + local one = v[1] + if one and one ~= true then + return true + end + end + end + for k, v in next, coverage do + local kern = v[32] + if kern then + local one = kern[1] + if one and one ~= true then + return true + end + end + end + end + end + end + end + end + end + end + return false +end + +otf.readers.registerextender { + name = "spacekerns", + action = function(data) + data.properties.hasspacekerns = hasspacekerns(data) + end +} + +local function spaceinitializer(tfmdata,value) -- attr + local resources = tfmdata.resources + local spacekerns = resources and resources.spacekerns + if value and spacekerns == nil then + local rawdata = tfmdata.shared and tfmdata.shared.rawdata + local properties = rawdata.properties + if properties and properties.hasspacekerns then + local sequences = resources.sequences + local validgpos = resources.features.gpos + if validgpos and sequences then + local left = { } + local right = { } + local last = 0 + local feat = nil + for i=1,#sequences do + local sequence = sequences[i] + local steps = sequence.steps + if steps then + -- we don't support space kerns in other features + local kern = sequence.features[tag] + if kern then + local kind = sequence.type + if kind == "gpos_pair" or kind == "gpos_single" then + if feat then + for script, languages in next, kern do + local f = feat[script] + if f then + for l in next, languages do + f[l] = true + end + else + feat[script] = languages + end + end + else + feat = kern + end + for i=1,#steps do + local step = steps[i] + local coverage = step.coverage + local rules = step.rules + if rules then + -- not now: analyze (simple) rules + elseif not coverage then + -- nothing to do + elseif kind == "gpos_single" then + -- makes no sense in TeX + elseif kind == "gpos_pair" then + local format = step.format + if format == "move" or format == "kern" then + local kerns = coverage[32] + if kerns then + for k, v in next, kerns do + right[k] = v + end + end + for k, v in next, coverage do + local kern = v[32] + if kern then + left[k] = kern + end + end + elseif format == "pair" then + local kerns = coverage[32] + if kerns then + for k, v in next, kerns do + local one = v[1] + if one and one ~= true then + right[k] = one[3] + end + end + end + for k, v in next, coverage do + local kern = v[32] + if kern then + local one = kern[1] + if one and one ~= true then + left[k] = one[3] + end + end + end + end + end + end + last = i + end + else + -- no steps ... needed for old one ... we could use the basekerns + -- instead + end + end + end + left = next(left) and left or false + right = next(right) and right or false + if left or right then + spacekerns = { + left = left, + right = right, + } + if last > 0 then + local triggersequence = { + -- no steps, see (!!) + features = { [tag] = feat or { dflt = { dflt = true, } } }, + flags = noflags, + name = "trigger_space_kerns", + order = { tag }, + type = "trigger_space_kerns", + left = left, + right = right, + } + insert(sequences,last,triggersequence) + end + end + end + end + resources.spacekerns = spacekerns + end + return spacekerns +end + +registerotffeature { + name = "spacekern", + description = "space kern injection", + default = true, + initializers = { + node = spaceinitializer, + }, +} diff --git a/tex/context/base/mkxl/lpdf-ini.lmt b/tex/context/base/mkxl/lpdf-ini.lmt index 97aeaa358..59e728dfb 100644 --- a/tex/context/base/mkxl/lpdf-ini.lmt +++ b/tex/context/base/mkxl/lpdf-ini.lmt @@ -368,7 +368,7 @@ do -- if mv and mv.__lpdftype then if v.__lpdftype__ then -- if v == t then - -- report_objects("ignoring circular reference in dirctionary") + -- report_objects("ignoring circular reference in dictionary") -- r[i] = f_key_null(k) -- else r[i] = f_key_value(k,tostring(v)) diff --git a/tex/context/base/mkxl/node-nut.lmt b/tex/context/base/mkxl/node-nut.lmt index 38ac5b0aa..61c9a9739 100644 --- a/tex/context/base/mkxl/node-nut.lmt +++ b/tex/context/base/mkxl/node-nut.lmt @@ -131,6 +131,8 @@ local nuts = { is_nut = direct.is_direct, is_zero_glue = direct.is_zero_glue, ischar = direct.is_char, + isprevchar = direct.is_prev_char, + isnextchar = direct.is_next_char, isglyph = direct.is_glyph, kerning = direct.kerning, last_node = direct.last_node, @@ -224,6 +226,7 @@ local nuts = { writable_spec = direct.writable_spec, write = direct.write, append = direct.append, + has_glyph_option = direct.has_glyph_option, } nodes.nuts = nuts diff --git a/tex/context/base/mkxl/strc-flt.mklx b/tex/context/base/mkxl/strc-flt.mklx index d2a1551d9..2eb7ca6d7 100644 --- a/tex/context/base/mkxl/strc-flt.mklx +++ b/tex/context/base/mkxl/strc-flt.mklx @@ -1778,7 +1778,7 @@ \hsize\scratchdimen \fi \strc_floats_make_complete_caption}% - \orelse\if\empty\p_strc_floats_caption_align + \orelse\ifempty\p_strc_floats_caption_align \setbox\b_strc_floats_caption\vbox {\strc_floats_caption_set_align \hsize\captionhsize @@ -1935,7 +1935,7 @@ {\doifelseinset\v!right\floatcaptiondirectives {\strc_floats_flush_right_caption_hang} {\strc_floats_flush_left_caption_hang}}} - {\box\b_strc_floats_caption}}}} + {\box\b_strc_floats_caption}}}} % is this ok \def\strc_floats_build_box_high {\strc_floats_build_box_next{\strc_floats_between_stack\strc_floats_flush_caption_hang\vfill}} diff --git a/tex/context/base/mkxl/symb-emj.lmt b/tex/context/base/mkxl/symb-emj.lmt new file mode 100644 index 000000000..b043d835d --- /dev/null +++ b/tex/context/base/mkxl/symb-emj.lmt @@ -0,0 +1,76 @@ +if not modules then modules = { } end modules ['symb-emj'] = { + version = 1.001, + comment = "companion to symb-emj.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- processors.hpack_filter does it all + +local symbols = fonts.symbols + +local resolvedemoji = characters.emoji.resolve +local processfeatures = fonts.handlers.otf.featuresprocessor +local injectspacing = nodes.injections.handler +local protectglyphs = nodes.handlers.protectglyphs +local tonodes = nodes.tonodes +local currentfont = font.current + +local nuts = nodes.nuts +local tonode = nuts.tonode +local tonut = nuts.tonut +local remove_node = nuts.remove +local isglyph = nuts.isglyph +local getnext = nuts.getnext + +local function removemodifiers(head) + local head = tonut(head) + local current = head + while current do + local char = isglyph(current) + if char and (char == 0x200D or (char >= 0x1F3FB and char <= 0x1F3FF)) then + head, current = remove_node(head,current,true) + else + current = getnext(current) + end + end + return tonode(head) +end + +-- fast enough, no need to memoize, maybe use attributes + +local function checkedemoji(name,id) + local str = resolvedemoji(name) + if str then + if not id then + id = currentfont() + end + local head = tonodes(str,id,nil,nil,true) -- use current attributes + head = processfeatures(head,id,false) + if head then + head = injectspacing(head) + protectglyphs(head) + return removemodifiers(head) + end + end +end + +symbols.emoji = { + resolved = resolvedemoji, + checked = checkedemoji, +} + +interfaces.implement { + name = "resolvedemoji", + actions = { resolvedemoji, context.escaped }, + arguments = "string", +} + +interfaces.implement { + name = "checkedemoji", + actions = { checkedemoji, context }, + arguments = "string", +} + + diff --git a/tex/context/base/mkxl/symb-emj.mkxl b/tex/context/base/mkxl/symb-emj.mkxl index 96c4c0859..e64646a52 100644 --- a/tex/context/base/mkxl/symb-emj.mkxl +++ b/tex/context/base/mkxl/symb-emj.mkxl @@ -13,7 +13,7 @@ \writestatus{loading}{ConTeXt Symbol Libraries / Emoji} -\registerctxluafile{symb-emj}{} +\registerctxluafile{symb-emj}{autosuffix} \unprotect diff --git a/tex/context/base/mkxl/symb-ini.lmt b/tex/context/base/mkxl/symb-ini.lmt new file mode 100644 index 000000000..9837afcf3 --- /dev/null +++ b/tex/context/base/mkxl/symb-ini.lmt @@ -0,0 +1,59 @@ +if not modules then modules = { } end modules ['symb-ini'] = { + version = 1.001, + comment = "companion to symb-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local context = context +local variables = interfaces.variables + +fonts = fonts or { } -- brrrr + +local symbols = fonts.symbols or { } +fonts.symbols = symbols + +local listitem = utilities.parsers.listitem +local uselibrary = resolvers.uselibrary + +local report_symbols = logs.reporter ("fonts","symbols") +local status_symbols = logs.messenger("fonts","symbols") + +local patterns = { + "symb-imp-%s.mkxl", + "symb-imp-%s.mkiv", + "symb-imp-%s.tex", + -- obsolete: + "symb-%s.mkiv", + "symb-%s.tex" +} + +local function action(name,foundname) + commands.loadlibrary(name,foundname,false) + status_symbols("library %a loaded",name) +end + +local function failure(name) + report_symbols("library %a is unknown",name) +end + +function symbols.uselibrary(name) + if name ~= variables.reset then + for name in listitem(name) do + uselibrary { + name = name, + patterns = patterns, + action = action, + failure = failure, + onlyonce = true, + } + end + end +end + +interfaces.implement { + name = "usesymbols", + actions = symbols.uselibrary, + arguments = "string", +} diff --git a/tex/context/base/mkxl/symb-ini.mkxl b/tex/context/base/mkxl/symb-ini.mkxl index 513174075..0346e39bb 100644 --- a/tex/context/base/mkxl/symb-ini.mkxl +++ b/tex/context/base/mkxl/symb-ini.mkxl @@ -16,7 +16,7 @@ \writestatus{loading}{ConTeXt Symbol Libraries / Initialization} -\registerctxluafile{symb-ini}{} +\registerctxluafile{symb-ini}{autosuffix} \unprotect diff --git a/tex/context/base/mkxl/typo-dir.mkxl b/tex/context/base/mkxl/typo-dir.mkxl index d875ac88f..1d696d8c5 100644 --- a/tex/context/base/mkxl/typo-dir.mkxl +++ b/tex/context/base/mkxl/typo-dir.mkxl @@ -22,7 +22,7 @@ \registerctxluafile{typo-dha}{} %registerctxluafile{typo-dua}{} %registerctxluafile{typo-dub}{} -\registerctxluafile{typo-duc}{} +\registerctxluafile{typo-duc}{autosuffix} \definesystemattribute[directions][public,pickup] diff --git a/tex/context/base/mkxl/typo-duc.lmt b/tex/context/base/mkxl/typo-duc.lmt new file mode 100644 index 000000000..07ae140e8 --- /dev/null +++ b/tex/context/base/mkxl/typo-duc.lmt @@ -0,0 +1,1048 @@ +if not modules then modules = { } end modules ['typo-duc'] = { + version = 1.001, + comment = "companion to typo-dir.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", + comment = "Unicode bidi (sort of) variant c", +} + +-- This is a follow up on typo-uda which itself is a follow up on t-bidi by Khaled Hosny which +-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as +-- an update on some recent unicode bidi developments. There is (and will) also be more control +-- added. As a consequence this module is somewhat slower than its precursor which itself is +-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin +-- support. However, in the meantime performance got a bit better and this third variant is again +-- some 10% faster than the second variant. + +-- todo (cf html): +-- +-- normal The element does not offer a additional level of embedding with respect to the bidirectional algorithm. For inline elements implicit reordering works across element boundaries. +-- embed If the element is inline, this value opens an additional level of embedding with respect to the bidirectional algorithm. The direction of this embedding level is given by the direction property. +-- bidi-override For inline elements this creates an override. For block container elements this creates an override for inline-level descendants not within another block container element. This means that inside the element, reordering is strictly in sequence according to the direction property; the implicit part of the bidirectional algorithm is ignored. +-- isolate This keyword indicates that the element's container directionality should be calculated without considering the content of this element. The element is therefore isolated from its siblings. When applying its bidirectional-resolution algorithm, its container element treats it as one or several U+FFFC Object Replacement Character, i.e. like an image. +-- isolate-override This keyword applies the isolation behavior of the isolate keyword to the surrounding content and the override behavior o f the bidi-override keyword to the inner content. +-- plaintext This keyword makes the elements directionality calculated without considering its parent bidirectional state or the value of the direction property. The directionality is calculated using the P2 and P3 rules of the Unicode Bidirectional Algorithm. +-- This value allows to display data which has already formatted using a tool following the Unicode Bidirectional Algorithm. +-- +-- todo: check for introduced errors +-- todo: reuse list, we have size, so we can just change values (and auto allocate when not there) +-- todo: reuse the stack +-- todo: no need for a max check +-- todo: collapse bound similar ranges (not ok yet) +-- todo: combine some sweeps +-- todo: removing is not needed when we inject at the same spot (only chnage the dir property) +-- todo: isolated runs (isolating runs are similar to bidi=local in the basic analyzer) + +-- todo: check unicode addenda (from the draft): +-- +-- Added support for canonical equivalents in BD16. +-- Changed logic in N0 to not check forwards for context in the case of enclosed text opposite the embedding direction. +-- Major extension of the algorithm to allow for the implementation of directional isolates and the introduction of new isolate-related values to the Bidi_Class property. +-- Adds BD8, BD9, BD10, BD11, BD12, BD13, BD14, BD15, and BD16, Sections 2.4 and 2.5, and Rules X5a, X5b, X5c and X6a. +-- Extensively revises Section 3.3.2, Explicit Levels and Directions and its existing X rules to formalize the algorithm for matching a PDF with the embedding or override initiator whose scope it terminates. +-- Moves Rules X9 and X10 into a separate new Section 3.3.3, Preparations for Implicit Processing. +-- Modifies Rule X10 to make the isolating run sequence the unit to which subsequent rules are applied. +-- Modifies Rule W1 to change an NSM preceded by an isolate initiator or PDI into ON. +-- Adds Rule N0 and makes other changes to Section 3.3.5, Resolving Neutral and Isolate Formatting Types to resolve bracket pairs to the same level. + +local insert, remove, unpack, concat = table.insert, table.remove, table.unpack, table.concat +local utfchar = utf.char +local setmetatable = setmetatable +local formatters = string.formatters + +local directiondata = characters.directions +local mirrordata = characters.mirrors +local textclassdata = characters.textclasses + +local nuts = nodes.nuts + +local getnext = nuts.getnext +local getprev = nuts.getprev +local getid = nuts.getid +local getsubtype = nuts.getsubtype +local getlist = nuts.getlist +----- getchar = nuts.getchar +local getattr = nuts.getattr +local getprop = nuts.getprop +local getdirection = nuts.getdirection +local isnextchar = nuts.isnextchar -- we do this before the font handler + +local setprop = nuts.setprop +local setchar = nuts.setchar +local setdirection = nuts.setdirection +local setattrlist = nuts.setattrlist + +local properties = nodes.properties.data + +local remove_node = nuts.remove +local insert_node_after = nuts.insert_after +local insert_node_before = nuts.insert_before +local start_of_par = nuts.start_of_par + +local nodepool = nuts.pool +local new_direction = nodepool.direction + +local nodecodes = nodes.nodecodes +local gluecodes = nodes.gluecodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local hlist_code = nodecodes.hlist +local vlist_code = nodecodes.vlist +local math_code = nodecodes.math +local dir_code = nodecodes.dir +local par_code = nodecodes.par +local penalty_code = nodecodes.penalty + +local parfillskip_code = gluecodes.parfillskip +local parfillleftskip_code = gluecodes.parfillleftskip + +local dirvalues = nodes.dirvalues +local lefttoright_code = dirvalues.lefttoright +local righttoleft_code = dirvalues.righttoleft + +local maximum_stack = 0xFF + +local a_directions = attributes.private('directions') + +local directions = typesetters.directions +local setcolor = directions.setcolor +local getfences = directions.getfences + +local remove_controls = true directives.register("typesetters.directions.removecontrols",function(v) remove_controls = v end) +----- analyze_fences = true directives.register("typesetters.directions.analyzefences", function(v) analyze_fences = v end) + +local report_directions = logs.reporter("typesetting","directions three") + +local trace_directions = false trackers.register("typesetters.directions", function(v) trace_directions = v end) +local trace_details = false trackers.register("typesetters.directions.details", function(v) trace_details = v end) +local trace_list = false trackers.register("typesetters.directions.list", function(v) trace_list = v end) + +-- strong (old): +-- +-- l : left to right +-- r : right to left +-- lro : left to right override +-- rlo : left to left override +-- lre : left to right embedding +-- rle : left to left embedding +-- al : right to legt arabic (esp punctuation issues) +-- +-- weak: +-- +-- en : english number +-- es : english number separator +-- et : english number terminator +-- an : arabic number +-- cs : common number separator +-- nsm : nonspacing mark +-- bn : boundary neutral +-- +-- neutral: +-- +-- b : paragraph separator +-- s : segment separator +-- ws : whitespace +-- on : other neutrals +-- +-- interesting: this is indeed better (and more what we expect i.e. we already use this split +-- in the old original (also these isolates) +-- +-- strong (new): +-- +-- l : left to right +-- r : right to left +-- al : right to left arabic (esp punctuation issues) +-- +-- explicit: (new) +-- +-- lro : left to right override +-- rlo : left to left override +-- lre : left to right embedding +-- rle : left to left embedding +-- pdf : pop dir format +-- lri : left to right isolate +-- rli : left to left isolate +-- fsi : first string isolate +-- pdi : pop directional isolate + +local whitespace = { + lre = true, + rle = true, + lro = true, + rlo = true, + pdf = true, + bn = true, + ws = true, +} + +local b_s_ws_on = { + b = true, + s = true, + ws = true, + on = true +} + +-- tracing + +local function show_list(list,size,what) + local what = what or "direction" + local joiner = utfchar(0x200C) + local result = { } + for i=1,size do + local entry = list[i] + local character = entry.char + local direction = entry[what] + if character == 0xFFFC then + local first = entry.id + local last = entry.last + local skip = entry.skip + if last then + result[i] = formatters["%-3s:%s %s..%s (%i)"](direction,joiner,nodecodes[first],nodecodes[last],skip or 0) + else + result[i] = formatters["%-3s:%s %s (%i)"](direction,joiner,nodecodes[first],skip or 0) + end + elseif character >= 0x202A and character <= 0x202C then + result[i] = formatters["%-3s:%s %U"](direction,joiner,character) + else + result[i] = formatters["%-3s:%s %c %U"](direction,joiner,character,character) + end + end + return concat(result,joiner .. " | " .. joiner) +end + +-- preparation + +local function show_done(list,size) + local joiner = utfchar(0x200C) + local result = { } + local format = formatters["<%s>"] + for i=1,size do + local entry = list[i] + local character = entry.char + local begindir = entry.begindir + local enddir = entry.enddir + if begindir then + result[#result+1] = format(begindir) + end + if entry.remove then + -- continue + elseif character == 0xFFFC then + result[#result+1] = format("?") + elseif character == 0x0020 then + result[#result+1] = format(" ") + elseif character >= 0x202A and character <= 0x202C then + result[#result+1] = format(entry.original) + else + result[#result+1] = utfchar(character) + end + if enddir then + result[#result+1] = format(enddir) + end + end + return concat(result,joiner) +end + +-- keeping the list and overwriting doesn't save much runtime, only a few percent +-- char is only used for mirror, so in fact we can as well only store it for +-- glyphs only +-- +-- tracking what direction is used and skipping tests is not faster (extra kind of +-- compensates gain) + +local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0, skip = 0 } } +local mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0, skip = 0 } } +local mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0, skip = 0 } } +local mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0, skip = 0 } } +local mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0, skip = 0 } } + +local stack = table.setmetatableindex("table") -- shared +local list = { } -- shared + +-- instead of skip we can just have slots filled with 'skip' + +local function build_list(head,where) + -- P1 + local current = head + local size = 0 + while current do + size = size + 1 + -- local id = getid(current) + local nxt, chr, id = isnextchar(current) + local p = properties[current] + if p and p.directions then + -- tricky as dirs can be injected in between + local skip = 0 + local last = id + -- current = getnext(current) + current = nxt + while current do + local id = getid(current) + local p = properties[current] + if p and p.directions then + skip = skip + 1 + last = id + current = getnext(current) + else + break + end + end + if id == last then -- the start id + list[size] = setmetatable({ skip = skip, id = id },mt_object) + else + list[size] = setmetatable({ skip = skip, id = id, last = last },mt_object) + end + -- elseif id == glyph_code then + elseif chr then + -- local chr = getchar(current) + local dir = directiondata[chr] + -- could also be a metatable + list[size] = { char = chr, direction = dir, original = dir, level = 0 } + -- current = getnext(current) + current = nxt + -- if not list[dir] then list[dir] = true end -- not faster when we check for usage + elseif id == glue_code then -- and how about kern + list[size] = setmetatable({ },mt_space) + -- current = getnext(current) + current = nxt + elseif id == dir_code then + local dir, pop = getdirection(current) + if dir == lefttoright_code then + list[size] = setmetatable({ },pop and mt_pdf or mt_lre) + elseif dir == righttoleft_code then + list[size] = setmetatable({ },pop and mt_pdf or mt_rle) + else + list[size] = setmetatable({ id = id },mt_object) + end + -- current = getnext(current) + current = nxt + elseif id == math_code then + local skip = 0 + -- current = getnext(current) + current = nxt + while getid(current) ~= math_code do + skip = skip + 1 + current = getnext(current) + end + skip = skip + 1 + current = getnext(current) + list[size] = setmetatable({ id = id, skip = skip },mt_object) + else -- disc_code: we assume that these are the same as the surrounding + local skip = 0 + local last = id + -- current = getnext(current) + current = nxt + while n do + local id = getid(current) + if id ~= glyph_code and id ~= glue_code and id ~= dir_code then + skip = skip + 1 + last = id + current = getnext(current) + else + break + end + end + if id == last then -- the start id + list[size] = setmetatable({ id = id, skip = skip },mt_object) + else + list[size] = setmetatable({ id = id, skip = skip, last = last },mt_object) + end + end + end + return list, size +end + +-- new + +-- we could support ( ] and [ ) and such ... + +-- ש ) ל ( א 0-0 +-- ש ( ל ] א 0-0 +-- ש ( ל ) א 2-4 +-- ש ( ל [ א ) כ ] 2-6 +-- ש ( ל ] א ) כ 2-6 +-- ש ( ל ) א ) כ 2-4 +-- ש ( ל ( א ) כ 4-6 +-- ש ( ל ( א ) כ ) 2-8,4-6 +-- ש ( ל [ א ] כ ) 2-8,4-6 + +local fencestack = table.setmetatableindex("table") + +local function resolve_fences(list,size,start,limit) + -- N0: funny effects, not always better, so it's an option + local nofstack = 0 + for i=start,limit do + local entry = list[i] + if entry.direction == "on" then + local char = entry.char + local mirror = mirrordata[char] + if mirror then + local class = textclassdata[char] + entry.mirror = mirror + entry.class = class + if class == "open" then + nofstack = nofstack + 1 + local stacktop = fencestack[nofstack] + stacktop[1] = mirror + stacktop[2] = i + elseif nofstack == 0 then + -- skip + elseif class == "close" then + while nofstack > 0 do + local stacktop = fencestack[nofstack] + if stacktop[1] == char then + local open = stacktop[2] + local close = i + list[open ].paired = close + list[close].paired = open + break + else + -- do we mirror or not + end + nofstack = nofstack - 1 + end + end + end + end + end +end + +-- local function test_fences(str) +-- local list = { } +-- for s in string.gmatch(str,".") do +-- local b = utf.byte(s) +-- list[#list+1] = { c = s, char = b, direction = directiondata[b] } +-- end +-- resolve_fences(list,#list,1,#size) +-- inspect(list) +-- end +-- +-- test_fences("a(b)c(d)e(f(g)h)i") +-- test_fences("a(b[c)d]") + +-- the action + +local function get_baselevel(head,list,size,direction) + if direction == lefttoright_code or direction == righttoleft_code then + return direction, true + elseif getid(head) == par_code and start_of_par(head) then + direction = getdirection(head) + if direction == lefttoright_code or direction == righttoleft_code then + return direction, true + end + end + -- P2, P3 + for i=1,size do + local entry = list[i] + local direction = entry.direction + if direction == "r" or direction == "al" then -- and an ? + return righttoleft_code, true + elseif direction == "l" then + return lefttoright_code, true + end + end + return lefttoright_code, false +end + +local function resolve_explicit(list,size,baselevel) +-- if list.rle or list.lre or list.rlo or list.lro then + -- X1 + local level = baselevel + local override = "on" + local nofstack = 0 + for i=1,size do + local entry = list[i] + local direction = entry.direction + -- X2 + if direction == "rle" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X3 + elseif direction == "lre" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X4 + elseif direction == "rlo" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "r" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X5 + elseif direction == "lro" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "l" + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack overflow at position %a with direction %a",i,direction) + end + -- X7 + elseif direction == "pdf" then + if nofstack > 0 then + local stacktop = stack[nofstack] + level = stacktop[1] + override = stacktop[2] + nofstack = nofstack - 1 + entry.level = level + entry.direction = "bn" + entry.remove = true + elseif trace_directions then + report_directions("stack underflow at position %a with direction %a", + i, direction) + else + report_directions("stack underflow at position %a with direction %a: %s", + i, direction, show_list(list,size)) + end + -- X6 + else + entry.level = level + if override ~= "on" then + entry.direction = override + end + end + end + -- X8 (reset states and overrides after paragraph) +end + +local function resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- W1: non spacing marks get the direction of the previous character +-- if list.nsm then + for i=start,limit do + local entry = list[i] + if entry.direction == "nsm" then + if i == start then + entry.direction = orderbefore + else + entry.direction = list[i-1].direction + end + end + end +-- end + -- W2: mess with numbers and arabic +-- if list.en then + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + for j=i-1,start,-1 do + local prev = list[j] + local direction = prev.direction + if direction == "al" then + entry.direction = "an" + break + elseif direction == "r" or direction == "l" then + break + end + end + end + end +-- end + -- W3 +-- if list.al then + for i=start,limit do + local entry = list[i] + if entry.direction == "al" then + entry.direction = "r" + end + end +-- end + -- W4: make separators number +-- if list.es or list.cs then + -- skip + if false then + for i=start+1,limit-1 do + local entry = list[i] + local direction = entry.direction + if direction == "es" then + if list[i-1].direction == "en" and list[i+1].direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = list[i-1].direction + if prevdirection == "en" then + if list[i+1].direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and list[i+1].direction == "an" then + entry.direction = "an" + end + end + end + else -- only more efficient when we have es/cs + local runner = start + 2 + if runner <= limit then + local before = list[start] + local entry = list[start + 1] + local after = list[runner] + while after do + local direction = entry.direction + if direction == "es" then + if before.direction == "en" and after.direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = before.direction + if prevdirection == "en" then + if after.direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and after.direction == "an" then + entry.direction = "an" + end + end + before = current + current = after + after = list[runner] + runner = runner + 1 + end + end + end +-- end + -- W5 +-- if list.et then + local i = start + while i <= limit do + if list[i].direction == "et" then + local runstart = i + local runlimit = runstart + for i=runstart,limit do + if list[i].direction == "et" then + runlimit = i + else + break + end + end + local rundirection = runstart == start and sor or list[runstart-1].direction + if rundirection ~= "en" then + rundirection = runlimit == limit and orderafter or list[runlimit+1].direction + end + if rundirection == "en" then + for j=runstart,runlimit do + list[j].direction = "en" + end + end + i = runlimit + end + i = i + 1 + end +-- end + -- W6 +-- if list.es or list.cs or list.et then + for i=start,limit do + local entry = list[i] + local direction = entry.direction + if direction == "es" or direction == "et" or direction == "cs" then + entry.direction = "on" + end + end +-- end + -- W7 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + local prev_strong = orderbefore + for j=i-1,start,-1 do + local direction = list[j].direction + if direction == "l" or direction == "r" then + prev_strong = direction + break + end + end + if prev_strong == "l" then + entry.direction = "l" + end + end + end +end + +local function resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- N1, N2 + for i=start,limit do + local entry = list[i] + if b_s_ws_on[entry.direction] then + -- this needs checking + local leading_direction, trailing_direction, resolved_direction + local runstart = i + local runlimit = runstart +-- for j=runstart,limit do + for j=runstart+1,limit do + if b_s_ws_on[list[j].direction] then +-- runstart = j + runlimit = j + else + break + end + end + if runstart == start then + leading_direction = orderbefore + else + leading_direction = list[runstart-1].direction + if leading_direction == "en" or leading_direction == "an" then + leading_direction = "r" + end + end + if runlimit == limit then + trailing_direction = orderafter + else + trailing_direction = list[runlimit+1].direction + if trailing_direction == "en" or trailing_direction == "an" then + trailing_direction = "r" + end + end + if leading_direction == trailing_direction then + -- N1 + resolved_direction = leading_direction + else + -- N2 / does the weird period + resolved_direction = entry.level % 2 == 1 and "r" or "l" + end + for j=runstart,runlimit do + list[j].direction = resolved_direction + end + i = runlimit + end + i = i + 1 + end +end + +local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) + for i=start,limit do + local entry = list[i] + local level = entry.level + local direction = entry.direction + if level % 2 ~= 1 then -- even + -- I1 + if direction == "r" then + entry.level = level + 1 + elseif direction == "an" or direction == "en" then + entry.level = level + 2 + end + else + -- I2 + if direction == "l" or direction == "en" or direction == "an" then + entry.level = level + 1 + end + end + end +end + +local function resolve_levels(list,size,baselevel,analyze_fences) + -- X10 + local start = 1 + while start < size do + local level = list[start].level + local limit = start + 1 + while limit < size and list[limit].level == level do + limit = limit + 1 + end + local prev_level = start == 1 and baselevel or list[start-1].level + local next_level = limit == size and baselevel or list[limit+1].level + local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" + local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" + -- W1 .. W7 + resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- N0 + if analyze_fences then + resolve_fences(list,size,start,limit) + end + -- N1 .. N2 + resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- I1 .. I2 + resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) + start = limit + end + -- L1 + for i=1,size do + local entry = list[i] + local direction = entry.original + -- (1) + if direction == "s" or direction == "b" then + entry.level = baselevel + -- (2) + for j=i-1,1,-1 do + local entry = list[j] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + end + end + -- (3) + for i=size,1,-1 do + local entry = list[i] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + -- L4 + if analyze_fences then + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + if entry.mirror and not entry.paired then + entry.mirror = false + end + -- okay + elseif entry.mirror then + entry.mirror = false + end + end + else + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + local mirror = mirrordata[entry.char] + if mirror then + entry.mirror = mirror + end + end + end + end +end + +local stack = { } + +local function insert_dir_points(list,size) + -- L2, but no actual reversion is done, we simply annotate where + -- begindir/endddir node will be inserted. + local maxlevel = 0 + local toggle = true + for i=1,size do + local level = list[i].level + if level > maxlevel then + maxlevel = level + end + end + for level=0,maxlevel do + local started -- = false + local begindir -- = nil + local enddir -- = nil + local prev -- = nil + if toggle then + begindir = lefttoright_code + enddir = lefttoright_code + toggle = false + else + begindir = righttoleft_code + enddir = righttoleft_code + toggle = true + end + for i=1,size do + local entry = list[i] + if entry.level >= level then + if not started then + entry.begindir = begindir + started = true + end + else + if started then + prev.enddir = enddir + started = false + end + end + prev = entry + end + end + -- make sure to close the run at end of line + local last = list[size] + if not last.enddir then + local n = 0 + for i=1,size do + local entry = list[i] + local e = entry.enddir + local b = entry.begindir + if e then + n = n - 1 + end + if b then + n = n + 1 + stack[n] = b + end + end + if n > 0 then + if trace_list and n > 1 then + report_directions("unbalanced list") + end + last.enddir = stack[n] + end + end +end + +-- We flag nodes that can be skipped when we see them again but because whatever +-- mechanism can inject dir nodes that then are not flagged, we don't flag dir +-- nodes that we inject here. + +local function apply_to_list(list,size,head,pardir) + local index = 1 + local current = head + if trace_list then + report_directions("start run") + end + while current do + if index > size then + report_directions("fatal error, size mismatch") + break + end + local id = getid(current) -- we can better store the id in list[index] + local entry = list[index] + local begindir = entry.begindir + local enddir = entry.enddir + local p = properties[current] + if p then + p.directions = true + else + properties[current] = { directions = true } + end + if id == glyph_code then + local mirror = entry.mirror + if mirror then + setchar(current,mirror) + end + if trace_directions then + local direction = entry.direction + if trace_list then + local original = entry.original + local char = entry.char + local level = entry.level + if direction == original then + report_directions("%2i : %C : %s",level,char,direction) + else + report_directions("%2i : %C : %s -> %s",level,char,original,direction) + end + end + setcolor(current,direction,false,mirror) + end + elseif id == hlist_code or id == vlist_code then + setdirection(current,pardir) -- is this really needed? + elseif id == glue_code then + -- Maybe I should also fix dua and dub but on the other hand ... why? + if enddir and getsubtype(current) == parfillskip_code then + -- insert the last enddir before \parfillskip glue + local c = current + local p = getprev(c) + if p and getid(p) == glue_code and getsubtype(p) == parfillleftskip_code then + c = p + p = getprev(c) + end + if p and getid(p) == penalty_code then -- linepenalty + c = p + end + -- there is always a par nodes so head will stay + head = insert_node_before(head,c,new_direction(enddir,true)) + enddir = false + end + elseif begindir then + if id == par_code and start_of_par(current) then + -- par should always be the 1st node + head, current = insert_node_after(head,current,new_direction(begindir)) + begindir = nil + end + end + if begindir then + head = insert_node_before(head,current,new_direction(begindir)) + end + local skip = entry.skip + if skip and skip > 0 then + for i=1,skip do + current = getnext(current) + local p = properties[current] + if p then + p.directions = true + else + properties[current] = { directions = true } + end + end + end + if enddir then + head, current = insert_node_after(head,current,new_direction(enddir,true)) + end + if not entry.remove then + current = getnext(current) + elseif remove_controls then + -- X9 + head, current = remove_node(head,current,true) + else + current = getnext(current) + end + index = index + 1 + end + if trace_list then + report_directions("stop run") + end + return head +end + +-- If needed we can optimize for only_one. There is no need to do anything +-- when it's not a glyph. Otherwise we only need to check mirror and apply +-- directions when it's different from the surrounding. Paragraphs always +-- have more than one node. Actually, we only enter this function when we +-- do have a glyph! + +local function process(head,direction,only_one,where) + -- for the moment a whole paragraph property + local attr = getattr(head,a_directions) + local analyze_fences = getfences(attr) + -- + local list, size = build_list(head,where) + local baselevel, dirfound = get_baselevel(head,list,size,direction) + if trace_details then + report_directions("analyze: baselevel %a",baselevel == righttoleft_code and "r2l" or "l2r") + report_directions("before : %s",show_list(list,size,"original")) + end + resolve_explicit(list,size,baselevel) + resolve_levels(list,size,baselevel,analyze_fences) + insert_dir_points(list,size) + if trace_details then + report_directions("after : %s",show_list(list,size,"direction")) + report_directions("result : %s",show_done(list,size)) + end + return apply_to_list(list,size,head,baselevel) +end + +local variables = interfaces.variables + +directions.installhandler(variables.one, process) -- for old times sake +directions.installhandler(variables.two, process) -- for old times sake +directions.installhandler(variables.three, process) -- for old times sake +directions.installhandler(variables.unicode,process) diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 2f4c0175e..b40f2c8f2 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : c:/data/develop/context/sources/luatex-fonts-merged.lua -- parent file : c:/data/develop/context/sources/luatex-fonts.lua --- merge date : 2020-12-09 10:48 +-- merge date : 2020-12-10 22:23 do -- begin closure to overcome local limits and interference |