This is a prelude to a more extensive logging module. We no longer
provide
A conversion function that takes a number, unit (string) and optional format (string) is implemented using this table.
--ldx]]-- +local f_none = formatters["%s%s"] +local f_true = formatters["%0.5F%s"] -local function numbertodimen(n,unit,fmt) +local function numbertodimen(n,unit,fmt) -- will be redefined later ! if type(n) == 'string' then return n else unit = unit or 'pt' + n = n * dimenfactors[unit] if not fmt then - fmt = "%s%s" + fmt = f_none(n,unit) elseif fmt == true then - fmt = "%0.5f%s" + fmt = f_true(n,unit) + else + return formatters[fmt](n,unit) end - return format(fmt,n*dimenfactors[unit],unit) - -- if fmt then - -- return format(fmt,n*dimenfactors[unit],unit) - -- else - -- return match(format("%.20f",n*dimenfactors[unit]),"(.-0?)0*$") .. unit - -- end end end diff --git a/tex/context/base/util-env.lua b/tex/context/base/util-env.lua index 0a708ea43..b72226900 100644 --- a/tex/context/base/util-env.lua +++ b/tex/context/base/util-env.lua @@ -9,11 +9,11 @@ if not modules then modules = { } end modules ['util-env'] = { local allocate, mark = utilities.storage.allocate, utilities.storage.mark local format, sub, match, gsub, find = string.format, string.sub, string.match, string.gsub, string.find -local unquoted, quoted = string.unquoted, string.quoted +local unquoted, quoted, optionalquoted = string.unquoted, string.quoted, string.optionalquoted local concat, insert, remove = table.concat, table.insert, table.remove -environment = environment or { } -local environment = environment +environment = environment or { } +local environment = environment -- precautions @@ -182,26 +182,14 @@ function environment.splitarguments(separator) -- rather special, cut-off before end function environment.reconstructcommandline(arg,noquote) + local resolveprefix = resolvers.resolve -- something rather special arg = arg or environment.originalarguments if noquote and #arg == 1 then - -- we could just do: return unquoted(resolvers.resolve(arg[i])) - local a = arg[1] - a = resolvers.resolve(a) - a = unquoted(a) - return a + return unquoted(resolveprefix and resolveprefix(arg[1]) or arg[1]) elseif #arg > 0 then local result = { } for i=1,#arg do - -- we could just do: result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) - local a = arg[i] - a = resolvers.resolve(a) - a = unquoted(a) - a = gsub(a,'"','\\"') -- tricky - if find(a," ") then - result[#result+1] = quoted(a) - else - result[#result+1] = a - end + result[i] = optionalquoted(resolveprefix and resolveprefix(arg[i]) or resolveprefix) end return concat(result," ") else @@ -238,26 +226,10 @@ end -- print(environment.relativepath("//x")) -- //x -- print(environment.relativepath()) -- e:/tmp --- -- to be tested: --- --- function environment.reconstructcommandline(arg,noquote) --- arg = arg or environment.originalarguments --- if noquote and #arg == 1 then --- return unquoted(resolvers.resolve(arg[1])) --- elseif #arg > 0 then --- local result = { } --- for i=1,#arg do --- result[#result+1] = format("%q",unquoted(resolvers.resolve(arg[i]))) -- always quote --- end --- return concat(result," ") --- else --- return "" --- end --- end - if arg then -- new, reconstruct quoted snippets (maybe better just remove the " then and add them later) + local newarg, instring = { }, false for index=1,#arg do diff --git a/tex/context/base/util-prs.lua b/tex/context/base/util-prs.lua index e5b35a727..a3c1c6f8f 100644 --- a/tex/context/base/util-prs.lua +++ b/tex/context/base/util-prs.lua @@ -21,6 +21,8 @@ parsers.patterns = patterns local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +local sortedkeys = table.sortedkeys +local tohash = table.tohash -- we share some patterns @@ -94,9 +96,7 @@ patterns.settings_to_hash_b = pattern_b_s patterns.settings_to_hash_c = pattern_c_s function parsers.make_settings_to_hash_pattern(set,how) - if type(str) == "table" then - return set - elseif how == "strict" then + if how == "strict" then return (pattern_c/set)^1 elseif how == "tolerant" then return (pattern_b/set)^1 @@ -106,7 +106,9 @@ function parsers.make_settings_to_hash_pattern(set,how) end function parsers.settings_to_hash(str,existing) - if type(str) == "table" then + if not str or str == "" then + return { } + elseif type(str) == "table" then if existing then for k, v in next, str do existing[k] = v @@ -115,17 +117,17 @@ function parsers.settings_to_hash(str,existing) else return str end - elseif str and str ~= "" then + else hash = existing or { } lpegmatch(pattern_a_s,str) return hash - else - return { } end end function parsers.settings_to_hash_tolerant(str,existing) - if type(str) == "table" then + if not str or str == "" then + return { } + elseif type(str) == "table" then if existing then for k, v in next, str do existing[k] = v @@ -134,17 +136,17 @@ function parsers.settings_to_hash_tolerant(str,existing) else return str end - elseif str and str ~= "" then + else hash = existing or { } lpegmatch(pattern_b_s,str) return hash - else - return { } end end function parsers.settings_to_hash_strict(str,existing) - if type(str) == "table" then + if not str or str == "" then + return nil + elseif type(str) == "table" then if existing then for k, v in next, str do existing[k] = v @@ -157,8 +159,6 @@ function parsers.settings_to_hash_strict(str,existing) hash = existing or { } lpegmatch(pattern_c_s,str) return next(hash) and hash - else - return nil end end @@ -167,24 +167,24 @@ local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) + C((nestedbraces + (1-comma))^0) local pattern = spaces * Ct(value*(separator*value)^0) --- "aap, {noot}, mies" : outer {} removes, leading spaces ignored +-- "aap, {noot}, mies" : outer {} removed, leading spaces ignored patterns.settings_to_array = pattern -- we could use a weak table as cache function parsers.settings_to_array(str,strict) - if type(str) == "table" then - return str - elseif not str or str == "" then + if not str or str == "" then return { } + elseif type(str) == "table" then + return str elseif strict then - if find(str,"{") then + if find(str,"{",1,true) then return lpegmatch(pattern,str) else return { str } end - elseif find(str,",") then + elseif find(str,",",1,true) then return lpegmatch(pattern,str) else return { str } @@ -195,12 +195,40 @@ end -- -- "{123} , 456 " -> "123" "456" -local separator = space^0 * comma * space^0 -local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) - + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0) -local withvalue = Carg(1) * value / function(f,s) return f(s) end -local pattern_a = spaces * Ct(value*(separator*value)^0) -local pattern_b = spaces * withvalue * (separator*withvalue)^0 +-- local separator = space^0 * comma * space^0 +-- local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) +-- + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0) +-- local withvalue = Carg(1) * value / function(f,s) return f(s) end +-- local pattern_a = spaces * Ct(value*(separator*value)^0) +-- local pattern_b = spaces * withvalue * (separator*withvalue)^0 + +local cache_a = { } +local cache_b = { } + +function parsers.groupedsplitat(symbol,withaction) + if not symbol then + symbol = "," + end + local pattern = (withaction and cache_b or cache_a)[symbol] + if not pattern then + local symbols = S(symbol) + local separator = space^0 * symbols * space^0 + local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) + + C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0) + if withaction then + local withvalue = Carg(1) * value / function(f,s) return f(s) end + pattern = spaces * withvalue * (separator*withvalue)^0 + cache_b[symbol] = pattern + else + pattern = spaces * Ct(value*(separator*value)^0) + cache_a[symbol] = pattern + end + end + return pattern +end + +local pattern_a = parsers.groupedsplitat(",",false) +local pattern_b = parsers.groupedsplitat(",",true) function parsers.stripped_settings_to_array(str) if not str or str == "" then @@ -221,8 +249,6 @@ end -- parsers.process_stripped_settings("{123} , 456 ",function(s) print("["..s.."]") end) -- parsers.process_stripped_settings("123 , 456 ",function(s) print("["..s.."]") end) --- - local function set(t,v) t[#t+1] = v end @@ -236,8 +262,8 @@ end function parsers.hash_to_string(h,separator,yes,no,strict,omit) if h then - local t, tn, s = { }, 0, table.sortedkeys(h) - omit = omit and table.tohash(omit) + local t, tn, s = { }, 0, sortedkeys(h) + omit = omit and tohash(omit) for i=1,#s do local key = s[i] if not omit or not omit[key] then @@ -275,15 +301,25 @@ function parsers.array_to_string(a,separator) end end -function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway - t = t or { } --- for s in gmatch(str,"%s*([^, ]+)") do -- space added - for s in gmatch(str,"[^, ]+") do -- space added - t[s] = true - end - return t +-- function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway +-- if str then +-- t = t or { } +-- for s in gmatch(str,"[^, ]+") do -- space added +-- t[s] = true +-- end +-- return t +-- else +-- return { } +-- end +-- end + +local pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true))^1,rawset) + +function utilities.parsers.settings_to_set(str,t) + return str and lpegmatch(pattern,str) or { } end + function parsers.simple_hash_to_string(h, separator) local t, tn = { }, 0 for k, v in sortedhash(h) do @@ -297,7 +333,7 @@ end -- for mtx-context etc: aaaa bbbb cccc=dddd eeee=ffff -local str = C((1-whitespace-equal)^1) +local str = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1) local setting = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset) local splitter = setting^1 @@ -305,6 +341,12 @@ function utilities.parsers.options_to_hash(str,target) return str and lpegmatch(splitter,str,1,target or { }) or { } end +local splitter = lpeg.tsplitat(" ") + +function utilities.parsers.options_to_array(str) + return str and lpegmatch(splitter,str) or { } +end + -- for chem (currently one level) local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) @@ -436,7 +478,7 @@ local defaultspecification = { separator = ",", quote = '"' } -- database module function parsers.csvsplitter(specification) - specification = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification + specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification local separator = specification.separator local quotechar = specification.quote local separator = S(separator ~= "" and separator or ",") @@ -475,7 +517,7 @@ end -- local list, names = mycsvsplitter(crap) inspect(list) inspect(names) function parsers.rfc4180splitter(specification) - specification = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification + specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification local separator = specification.separator --> rfc: COMMA local quotechar = P(specification.quote) --> DQUOTE local dquotechar = quotechar * quotechar --> 2DQUOTE @@ -488,7 +530,7 @@ function parsers.rfc4180splitter(specification) local field = escaped + non_escaped + Cc("") local record = Ct(field * (separator * field)^1) local headerline = record * Cp() - local wholeblob = Ct((newline^-1 * record)^0) + local wholeblob = Ct((newline^(specification.strict and -1 or 1) * record)^0) return function(data,getheader) if getheader then local header, position = lpegmatch(headerline,data) @@ -542,8 +584,8 @@ end -- -local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + P(1))^0) -local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + P(1))^0) +local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0) +local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0) patterns.unittotex = pattern @@ -551,7 +593,7 @@ function parsers.unittotex(str,textmode) return lpegmatch(textmode and pattern_text or pattern_math,str) end -local pattern = Cs((P("^") / "" * lpegpatterns.integer * Cc("") + P(1))^0) +local pattern = Cs((P("^") / "" * lpegpatterns.integer * Cc("") + anything)^0) function parsers.unittoxml(str) return lpegmatch(pattern,str) @@ -560,10 +602,10 @@ end -- print(utilities.parsers.unittotex("10^-32 %"),utilities.parsers.unittoxml("10^32 %")) local cache = { } -local spaces = lpeg.patterns.space^0 +local spaces = lpegpatterns.space^0 local dummy = function() end -table.setmetatableindex(cache,function(t,k) +setmetatableindex(cache,function(t,k) local separator = P(k) local value = (1-separator)^0 local pattern = spaces * C(value) * separator^0 * Cp() @@ -648,3 +690,27 @@ function utilities.parsers.runtime(time) local seconds = mod(time,60) return days, hours, minutes, seconds end + +-- + +local spacing = whitespace^0 +local apply = P("->") +local method = C((1-apply)^1) +local token = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1) + +local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token + +function utilities.parsers.splitmethod(str,default) + if str then + return lpegmatch(pattern,str,1,default or false) + else + return default or false, "" + end +end + +-- print(utilities.parsers.splitmethod(" foo -> {bar} ")) +-- print(utilities.parsers.splitmethod("foo->{bar}")) +-- print(utilities.parsers.splitmethod("foo->bar")) +-- print(utilities.parsers.splitmethod("foo")) +-- print(utilities.parsers.splitmethod("{foo}")) +-- print(utilities.parsers.splitmethod()) diff --git a/tex/context/base/util-sbx.lua b/tex/context/base/util-sbx.lua new file mode 100644 index 000000000..260e8b3b5 --- /dev/null +++ b/tex/context/base/util-sbx.lua @@ -0,0 +1,415 @@ +if not modules then modules = { } end modules ['util-sbx'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- Note: we use expandname and collapsepath and these use chdir +-- which is overloaded so we need to use originals there. Just +-- something to keep in mind. + +if not sandbox then require("l-sandbox") end -- for testing + +local next, type = next, type + +local replace = utilities.templates.replace +local collapsepath = file.collapsepath +local expandname = dir.expandname +local sortedhash = table.sortedhash +local lpegmatch = lpeg.match +local platform = os.type +local P, S, C = lpeg.P, lpeg.S, lpeg.C +local gsub = string.gsub +local lower = string.lower +local unquoted = string.unquoted +local optionalquoted = string.optionalquoted + +local sandbox = sandbox +local validroots = { } +local validrunners = { } +local validbinaries = { } +local validators = { } +local p_validroot = nil +local finalized = nil +local norunners = false +local trace = false +local p_split = lpeg.tsplitat(" ") -- more spaces? + +local report = logs.reporter("sandbox") + +trackers.register("sandbox",function(v) trace = v end) -- often too late anyway + +sandbox.setreporter(report) + +sandbox.finalizer(function() + finalized = true +end) + +local function registerroot(root,what) -- what == read|write + if finalized then + report("roots are already finalized") + else + root = collapsepath(expandname(root)) + if platform == "windows" then + root = lower(root) -- we assume ascii names + end + -- true: read & write | false: read + validroots[root] = what == "write" or false + end +end + +sandbox.finalizer(function() -- initializers can set the path + if p_validroot then + report("roots are already initialized") + else + sandbox.registerroot(".","write") -- always ok + -- also register texmf as read + for name in sortedhash(validroots) do + if p_validroot then + p_validroot = P(name) + p_validroot + else + p_validroot = P(name) + end + end + p_validroot = p_validroot / validroots + end +end) + +local function registerrunner(specification) + if finalized then + report("runners are already finalized") + else + local name = specification.name + if not name then + report("no runner name specified") + return + end + local program = specification.program + if type(program) == "string" then + -- common for all platforms + elseif type(program) == "table" then + program = program[platform] + end + if type(program) ~= "string" or program == "" then + report("invalid runner %a specified for platform %a",name,platform) + return + end + specification.program = program + validrunners[name] = specification + end +end + +local function registerbinary(name) + if finalized then + report("binaries are already finalized") + elseif type(name) == "string" and name ~= "" then + validbinaries[name] = true + end +end + +-- begin of validators + +local p_write = S("wa") p_write = (1 - p_write)^0 * p_write +local p_path = S("\\/~$%:") p_path = (1 - p_path )^0 * p_path -- be easy on other arguments + +local function normalized(name) -- only used in executers + if platform == "windows" then + name = gsub(name,"/","\\") + end + return name +end + +function sandbox.possiblepath(name) + return lpegmatch(p_path,name) and true or false +end + +local filenamelogger = false + +function sandbox.setfilenamelogger(l) + filenamelogger = type(l) == "function" and l or false +end + +local function validfilename(name,what) + if p_validroot and type(name) == "string" and lpegmatch(p_path,name) then + local asked = collapsepath(expandname(name)) + if platform == "windows" then + asked = lower(asked) -- we assume ascii names + end + local okay = lpegmatch(p_validroot,asked) + if okay == true then + -- read and write access + if filenamelogger then + filenamelogger(name,"w",asked,true) + end + return name + elseif okay == false then + -- read only access + if not what then + -- no further argument to io.open so a readonly case + if filenamelogger then + filenamelogger(name,"r",asked,true) + end + return name + elseif lpegmatch(p_write,what) then + if filenamelogger then + filenamelogger(name,"w",asked,false) + end + return -- we want write access + else + if filenamelogger then + filenamelogger(name,"r",asked,true) + end + return name + end + else + if filenamelogger then + filenamelogger(name,"*",name,false) + end + end + else + return name + end +end + +local function readable(name) + if platform == "windows" then + name = lower(name) -- we assume ascii names + end + local valid = validfilename(name,"r") + if valid then + return normalized(valid) + end +end + +local function writeable(name) + if platform == "windows" then + name = lower(name) -- we assume ascii names + end + local valid = validfilename(name,"w") + if valid then + return normalized(valid) + end +end + +validators.readable = readable +validators.writeable = writeable +validators.filename = readable + +table.setmetatableindex(validators,function(t,k) + if k then + t[k] = readable + end + return readable +end) + +function validators.string(s) + return s -- can be used to prevent filename checking +end + +-- end of validators + +sandbox.registerroot = registerroot +sandbox.registerrunner = registerrunner +sandbox.registerbinary = registerbinary +sandbox.validfilename = validfilename + +local function filehandlerone(action,one,...) + local checkedone = validfilename(one) + if checkedone then + return action(one,...) + else +-- report("file %a is unreachable",one) + end +end + +local function filehandlertwo(action,one,two,...) + local checkedone = validfilename(one) + if checkedone then + local checkedtwo = validfilename(two) + if checkedtwo then + return action(one,two,...) + else +-- report("file %a is unreachable",two) + end + else +-- report("file %a is unreachable",one) + end +end + +local function iohandler(action,one,...) + if type(one) == "string" then + local checkedone = validfilename(one) + if checkedone then + return action(one,...) + end + elseif one then + return action(one,...) + else + return action() + end +end + +-- runners can be strings or tables +-- +-- os.execute : string +-- os.exec : table with program in [0|1] +-- os.spawn : table with program in [0|1] +-- +-- our execute: registered program with specification + +local function runhandler(action,name,specification) + local kind = type(name) + if kind ~= "string" then + return + end + if norunners then + report("no runners permitted, ignoring command: %s",name) + return + end + local spec = validrunners[name] + if not spec then + report("unknown runner: %s",name) + return + end + -- specs are already checked + local program = spec.program + local variables = { } + local checkers = spec.checkers or { } + if specification then + -- we only handle runners that are defined before the sandbox is + -- closed so in principle we cannot have user runs with no files + -- while for context runners we assume a robust specification + for k, v in next, specification do + local checker = validators[checkers[k]] + local value = checker(unquoted(v)) -- todo: write checkers + if value then + variables[k] = optionalquoted(value) + else + report("suspicious argument found, run blocked: %s",v) + return + end + end + end + local command = replace(program,variables) + if trace then + report("executing runner: %s",command) + end + return action(command) +end + +-- only registered (from list) -- no checking on writable so let's assume harmless +-- runs + +local function binaryhandler(action,name) + local kind = type(name) + local list = name + if kind == "string" then + list = lpegmatch(p_split,name) + end + local program = name[0] or name[1] + if type(program) ~= "string" or program == "" then + return --silently ignore + end + if norunners then + report("no binaries permitted, ignoring command: %s",program) + return + end + if not validbinaries[program] then + report("binary is not permitted: %s",program) + return + end + for i=0,#list do + local n = list[i] + if n then + local v = readable(unquoted(n)) + if v then + list[i] = optionalquoted(v) + else + report("suspicious argument found, run blocked: %s",n) + return + end + end + end + return action(name) +end + +sandbox.filehandlerone = filehandlerone +sandbox.filehandlertwo = filehandlertwo +sandbox.iohandler = iohandler +sandbox.runhandler = runhandler +sandbox.binaryhandler = binaryhandler + +function sandbox.disablerunners() + norunners = true +end + +local execute = sandbox.original(os.execute) + +function sandbox.run(name,specification) + return runhandler(execute,name,specification) +end + +------------------- + +local overload = sandbox.overload +local register = sandbox.register + + overload(loadfile, filehandlerone,"loadfile") -- todo + +if io then + overload(io.open, filehandlerone,"io.open") + overload(io.popen, filehandlerone,"io.popen") + overload(io.input, iohandler, "io.input") + overload(io.output, iohandler, "io.output") + overload(io.lines, filehandlerone,"io.lines") +end + +if os then + overload(os.execute, binaryhandler, "os.execute") + overload(os.spawn, binaryhandler, "os.spawn") + overload(os.exec, binaryhandler, "os.exec") + overload(os.rename, filehandlertwo,"os.rename") + overload(os.remove, filehandlerone,"os.remove") +end + +if lfs then + overload(lfs.chdir, filehandlerone,"lfs.chdir") + overload(lfs.mkdir, filehandlerone,"lfs.mkdir") + overload(lfs.rmdir, filehandlerone,"lfs.rmdir") + overload(lfs.isfile, filehandlerone,"lfs.isfile") + overload(lfs.isdir, filehandlerone,"lfs.isdir") + overload(lfs.attributes, filehandlerone,"lfs.attributes") + overload(lfs.dir, filehandlerone,"lfs.dir") + overload(lfs.lock_dir, filehandlerone,"lfs.lock_dir") + overload(lfs.touch, filehandlerone,"lfs.touch") + overload(lfs.link, filehandlertwo,"lfs.link") + overload(lfs.setmode, filehandlerone,"lfs.setmode") + overload(lfs.readlink, filehandlerone,"lfs.readlink") + overload(lfs.shortname, filehandlerone,"lfs.shortname") + overload(lfs.symlinkattributes,filehandlerone,"lfs.symlinkattributes") +end + +-- these are used later on + +if zip then + zip.open = register(zip.open, filehandlerone,"zip.open") +end + +if fontloader then + fontloader.open = register(fontloader.open,filehandlerone,"fontloader.open") + fontloader.info = register(fontloader.info,filehandlerone,"fontloader.info") +end + +if epdf then + epdf.open = register(epdf.open, filehandlerone,"epdf.open") +end + +-- not used in a normal mkiv run : os.spawn = os.execute +-- not used in a normal mkiv run : os.exec = os.exec + +-- print(io.open("test.log")) +-- sandbox.enable() +-- print(io.open("test.log")) +-- print(io.open("t:/test.log")) diff --git a/tex/context/base/util-sci.lua b/tex/context/base/util-sci.lua new file mode 100644 index 000000000..c3e24cd9d --- /dev/null +++ b/tex/context/base/util-sci.lua @@ -0,0 +1,280 @@ +local gsub, sub, find = string.gsub, string.sub, string.find +local concat = table.concat +local formatters = string.formatters +local lpegmatch = lpeg.match +local setmetatableindex = table.setmetatableindex + +local scite = scite or { } +utilities.scite = scite + +local report = logs.reporter("scite") + +local lexerroot = file.dirname(resolvers.find_file("scite-context-lexer.lua")) + +local knownlexers = { + tex = "tex", mkiv = "tex", mkvi = "tex", mkxi = "tex", mkix = "tex", mkii = "tex", cld = "tex", + lua = "lua", lfg = "lua", lus = "lua", + w = "web", ww = "web", + c = "cpp", h = "cpp", cpp = "cpp", hpp = "cpp", cxx = "cpp", hxx = "cpp", + xml = "xml", lmx = "xml", ctx = "xml", xsl = "xml", xsd = "xml", rlx = "xml", css = "xml", dtd = "xml", + bib = "bibtex", + rme = "txt", + -- todo: pat/hyp ori +} + +lexer = nil -- main lexer, global (for the moment needed for themes) + +local function loadscitelexer() + if not lexer then + dir.push(lexerroot) + lexer = dofile("scite-context-lexer.lua") + dofile("themes/scite-context-theme.lua") + dir.pop() + end + return lexer +end + +local loadedlexers = setmetatableindex(function(t,k) + local l = knownlexers[k] or k + dir.push(lexerroot) + loadscitelexer() + local v = lexer.load(formatters["scite-context-lexer-%s"](l)) + dir.pop() + t[l] = v + t[k] = v + return v +end) + +scite.loadedlexers = loadedlexers +scite.knownlexers = knownlexers +scite.loadscitelexer = loadscitelexer + +local f_fore_bold = formatters['.%s { display: inline ; font-weight: bold ; color: #%s%s%s ; }'] +local f_fore_none = formatters['.%s { display: inline ; font-weight: normal ; color: #%s%s%s ; }'] +local f_none_bold = formatters['.%s { display: inline ; font-weight: bold ; }'] +local f_none_none = formatters['.%s { display: inline ; font-weight: normal ; }'] +local f_div_class = formatters['%linenumbers% |
+ %lexedcontent% |
+
+%dirlist% ++ + +]]) + +function scite.converttree(sourceroot,targetroot,numbered) + if lfs.isdir(sourceroot) then + statistics.starttiming() + local skipped = { } + local noffiles = 0 + dir.makedirs(targetroot) + local function scan(sourceroot,targetroot,subpath) + local tree = { } + for name in lfs.dir(sourceroot) do + if name ~= "." and name ~= ".." then + local sourcename = file.join(sourceroot,name) + local targetname = file.join(targetroot,name) + local mode = lfs.attributes(sourcename,'mode') + local path = subpath and file.join(subpath,name) or name + if mode == 'file' then + local filetype = file.suffix(sourcename) + local basename = file.basename(name) + local targetname = maketargetname(targetname) + local fullname = file.join(path,name) + if knownlexers[filetype] then + report("converting file %a to %a",sourcename,targetname) + scite.filetohtml(sourcename,nil,targetname,numbered,fullname) + noffiles = noffiles + 1 + tree[#tree+1] = f_tree_entry(file.basename(targetname),basename) + else + skipped[filetype] = true + report("no lexer for %a",sourcename) + end + else + dir.makedirs(targetname) + scan(sourcename,targetname,path) + tree[#tree+1] = f_tree_entry(file.join(name,"files.html"),name) + end + end + end + report("saving tree in %a",targetroot) + local htmldata = htmlfile { + dirlist = concat(tree,"\n"), + styles = "", + title = path or "context dir listing", + } + io.savedata(file.join(targetroot,"files.html"),htmldata) + end + scan(sourceroot,targetroot) + if next(skipped) then + report("skipped filetypes: %a",table.concat(table.sortedkeys(skipped)," ")) + end + statistics.stoptiming() + report("conversion time for %s files: %s",noffiles,statistics.elapsedtime()) + end +end + +-- scite.filetohtml("strc-sec.mkiv",nil,"e:/tmp/util-sci.html",true) +-- scite.filetohtml("syst-aux.mkiv",nil,"e:/tmp/util-sci.html",true) + +-- scite.converttree("t:/texmf/tex/context","e:/tmp/html/context",true) + +return scite diff --git a/tex/context/base/util-seq.lua b/tex/context/base/util-seq.lua index 35e693285..08fc4e95c 100644 --- a/tex/context/base/util-seq.lua +++ b/tex/context/base/util-seq.lua @@ -17,13 +17,15 @@ use locals to refer to them when compiling the chain. -- todo: protect groups (as in tasks) -local format, gsub, concat, gmatch = string.format, string.gsub, table.concat, string.gmatch +local gsub, concat, gmatch = string.gsub, table.concat, string.gmatch local type, load = type, load utilities = utilities or { } local tables = utilities.tables local allocate = utilities.storage.allocate +local formatters = string.formatters + local sequencers = { } utilities.sequencers = sequencers @@ -31,6 +33,7 @@ local functions = allocate() sequencers.functions = functions local removevalue = tables.removevalue +local replacevalue = tables.replacevalue local insertaftervalue = tables.insertaftervalue local insertbeforevalue = tables.insertbeforevalue @@ -189,6 +192,18 @@ function sequencers.removeaction(t,group,action,force) end end +function sequencers.replaceaction(t,group,oldaction,newaction,force) + t = known[t] + if t then + local g = t.list[group] + if g and (force or validaction(oldaction)) then + replacevalue(g,oldaction,newaction) + t.dirty = true + t.runner = nil + end + end +end + local function localize(str) return (gsub(str,"[%.: ]+","_")) end @@ -204,20 +219,23 @@ local function construct(t) for i=1,#actions do local action = actions[i] if not askip[action] then + local localized if type(action) == "function" then local name = localize(tostring(action)) functions[name] = action - action = format("utilities.sequencers.functions.%s",name) + action = formatters["utilities.sequencers.functions.%s"](name) + localized = localize(name) -- shorter than action + else + localized = localize(action) end - local localized = localize(action) n = n + 1 - variables[n] = format("local %s = %s",localized,action) + variables[n] = formatters["local %s = %s"](localized,action) if not returnvalues then - calls[n] = format("%s(%s)",localized,arguments) + calls[n] = formatters["%s(%s)"](localized,arguments) elseif n == 1 then - calls[n] = format("local %s = %s(%s)",returnvalues,localized,arguments) + calls[n] = formatters["local %s = %s(%s)"](returnvalues,localized,arguments) else - calls[n] = format("%s = %s(%s)",returnvalues,localized,arguments) + calls[n] = formatters["%s = %s(%s)"](returnvalues,localized,arguments) end end end @@ -230,9 +248,9 @@ local function construct(t) variables = concat(variables,"\n") calls = concat(calls,"\n") if results then - t.compiled = format("%s\nreturn function(%s)\n%s\nreturn %s\nend",variables,arguments,calls,results) + t.compiled = formatters["%s\nreturn function(%s)\n%s\nreturn %s\nend"](variables,arguments,calls,results) else - t.compiled = format("%s\nreturn function(%s)\n%s\nend",variables,arguments,calls) + t.compiled = formatters["%s\nreturn function(%s)\n%s\nend"](variables,arguments,calls) end end -- print(t.compiled) @@ -258,6 +276,7 @@ compile = function(t,compiler,n) -- already referred to in sequencers.new if compiled == "" then runner = false else +-- inspect(compiled) runner = compiled and load(compiled)() -- we can use loadstripped here end t.runner = runner @@ -314,12 +333,12 @@ function sequencers.nodeprocessor(t,nofarguments) -- todo: handle 'kind' in plug if not askip[action] then local localized = localize(action) n = n + 1 - vars[n] = format("local %s = %s",localized,action) + vars[n] = formatters["local %s = %s"](localized,action) -- only difference with tostring is kind and rets (why no return) if kind[action] == "nohead" then - calls[n] = format(" ok = %s(head%s) done = done or ok",localized,args) + calls[n] = formatters[" ok = %s(head%s) done = done or ok"](localized,args) else - calls[n] = format(" head, ok = %s(head%s) done = done or ok",localized,args) + calls[n] = formatters[" head, ok = %s(head%s) done = done or ok"](localized,args) end -- local s = " print('" .. tostring(group) .. " " .. tostring(action) .. " : ' .. tostring(head)) " -- calls[n] = s .. calls[n] .. s @@ -327,6 +346,6 @@ function sequencers.nodeprocessor(t,nofarguments) -- todo: handle 'kind' in plug end end end - local processor = #calls > 0 and format(template_yes,concat(vars,"\n"),args,concat(calls,"\n")) or template_nop + local processor = #calls > 0 and formatters[template_yes](concat(vars,"\n"),args,concat(calls,"\n")) or template_nop return processor end diff --git a/tex/context/base/util-sql-loggers.lua b/tex/context/base/util-sql-loggers.lua index 7fceb8032..ceb1ff75c 100644 --- a/tex/context/base/util-sql-loggers.lua +++ b/tex/context/base/util-sql-loggers.lua @@ -52,7 +52,7 @@ table.setmetatableindex(fromtype,function() return "info" end) loggers.totype = totype loggers.fromtype = fromtype -local template =[[ +local template = [[ CREATE TABLE IF NOT EXISTS %basename% ( `id` int(11) NOT NULL AUTO_INCREMENT, `time` int(11) NOT NULL, diff --git a/tex/context/base/util-sta.lua b/tex/context/base/util-sta.lua index 1a61ec4e6..27ab5a624 100644 --- a/tex/context/base/util-sta.lua +++ b/tex/context/base/util-sta.lua @@ -81,6 +81,8 @@ end function stacker.new(name) + local report = logs.reporter("stacker",name or nil) + local s local stack = { } @@ -126,8 +128,18 @@ function stacker.new(name) end end - local tops = { } - local top, switch + local tops = { } + local top = nil + local switch = nil + + local function resolve_reset(mode) + if #tops > 0 then + report("resetting %s left-over states of %a",#tops,name) + end + tops = { } + top = nil + switch = nil + end local function resolve_begin(mode) if mode then @@ -206,8 +218,7 @@ function stacker.new(name) local function resolve_end() -- resolve_step(s.unset) - local noftop = #top - if noftop > 0 then + if #tops > 0 then -- was #top brrr local result = s.stop(s,top,1,#top) remove(tops) top = tops[#tops] @@ -224,8 +235,6 @@ function stacker.new(name) resolve_end() end - local report = logs.reporter("stacker",name or nil) - s = { name = name or "unknown", unset = -1, @@ -240,6 +249,7 @@ function stacker.new(name) resolve_begin = resolve_begin, resolve_step = resolve_step, resolve_end = resolve_end, + resolve_reset = resolve_reset, } return s -- we can overload functions diff --git a/tex/context/base/util-str.lua b/tex/context/base/util-str.lua index af8b1651e..de4a87e9f 100644 --- a/tex/context/base/util-str.lua +++ b/tex/context/base/util-str.lua @@ -20,21 +20,44 @@ local utfchar, utfbyte = utf.char, utf.byte ----- loadstripped = utilities.lua.loadstripped ----- setmetatableindex = table.setmetatableindex -local loadstripped = _LUAVERSION < 5.2 and load or function(str) - return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load +local loadstripped = nil + +if _LUAVERSION < 5.2 then + + loadstripped = function(str,shortcuts) + return load(str) + end + +else + + loadstripped = function(str,shortcuts) + if shortcuts then + return load(dump(load(str),true),nil,nil,shortcuts) + else + return load(dump(load(str),true)) + end + end + end -- todo: make a special namespace for the formatter if not number then number = { } end -- temp hack for luatex-fonts -local stripper = patterns.stripzeros +local stripper = patterns.stripzeros +local newline = patterns.newline +local endofstring = patterns.endofstring +local whitespace = patterns.whitespace +local spacer = patterns.spacer +local spaceortab = patterns.spaceortab local function points(n) + n = tonumber(n) return (not n or n == 0) and "0pt" or lpegmatch(stripper,format("%.5fpt",n/65536)) end local function basepoints(n) + n = tonumber(n) return (not n or n == 0) and "0bp" or lpegmatch(stripper,format("%.5fbp", n*(7200/7227)/65536)) end @@ -44,12 +67,12 @@ number.basepoints = basepoints -- str = " \n \ntest \n test\ntest " -- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]") -local rubish = patterns.spaceortab^0 * patterns.newline -local anyrubish = patterns.spaceortab + patterns.newline +local rubish = spaceortab^0 * newline +local anyrubish = spaceortab + newline local anything = patterns.anything -local stripped = (patterns.spaceortab^1 / "") * patterns.newline +local stripped = (spaceortab^1 / "") * newline local leading = rubish^0 / "" -local trailing = (anyrubish^1 * patterns.endofstring) / "" +local trailing = (anyrubish^1 * endofstring) / "" local redundant = rubish^3 / "\n" local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0) @@ -111,7 +134,7 @@ local pattern = return "" end end - + patterns.newline * Cp() / function(position) + + newline * Cp() / function(position) extra, start = 0, position end + patterns.anything @@ -136,17 +159,105 @@ end -- print(strings.tabtospace(t[k])) -- end -function strings.striplong(str) -- strips all leading spaces - str = gsub(str,"^%s*","") - str = gsub(str,"[\n\r]+ *","\n") - return str +-- todo: lpeg + +-- function strings.striplong(str) -- strips all leading spaces +-- str = gsub(str,"^%s*","") +-- str = gsub(str,"[\n\r]+ *","\n") +-- return str +-- end + +local space = spacer^0 +local nospace = space/"" +local endofline = nospace * newline + +local stripend = (whitespace^1 * endofstring)/"" + +local normalline = (nospace * ((1-space*(newline+endofstring))^1) * nospace) + +local stripempty = endofline^1/"" +local normalempty = endofline^1 +local singleempty = endofline * (endofline^0/"") +local doubleempty = endofline * endofline^-1 * (endofline^0/"") + +local stripstart = stripempty^0 + +local p_prune_normal = Cs ( stripstart * ( stripend + normalline + normalempty )^0 ) +local p_prune_collapse = Cs ( stripstart * ( stripend + normalline + doubleempty )^0 ) +local p_prune_noempty = Cs ( stripstart * ( stripend + normalline + singleempty )^0 ) +local p_retain_normal = Cs ( ( normalline + normalempty )^0 ) +local p_retain_collapse = Cs ( ( normalline + doubleempty )^0 ) +local p_retain_noempty = Cs ( ( normalline + singleempty )^0 ) + +-- function striplines(str,prune,collapse,noempty) +-- if prune then +-- if noempty then +-- return lpegmatch(p_prune_noempty,str) or str +-- elseif collapse then +-- return lpegmatch(p_prune_collapse,str) or str +-- else +-- return lpegmatch(p_prune_normal,str) or str +-- end +-- else +-- if noempty then +-- return lpegmatch(p_retain_noempty,str) or str +-- elseif collapse then +-- return lpegmatch(p_retain_collapse,str) or str +-- else +-- return lpegmatch(p_retain_normal,str) or str +-- end +-- end +-- end + +local striplinepatterns = { + ["prune"] = p_prune_normal, + ["prune and collapse"] = p_prune_collapse, -- default + ["prune and no empty"] = p_prune_noempty, + ["retain"] = p_retain_normal, + ["retain and collapse"] = p_retain_collapse, + ["retain and no empty"] = p_retain_noempty, + ["collapse"] = patterns.collapser, -- how about: stripper fullstripper +} + +setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end }) + +strings.striplinepatterns = striplinepatterns + +function strings.striplines(str,how) + return str and lpegmatch(striplinepatterns[how],str) or str end --- local template = string.striplong([[ +-- also see: string.collapsespaces + +strings.striplong = strings.striplines -- for old times sake + +-- local str = table.concat( { +-- " ", +-- " aap", +-- " noot mies", +-- " ", +-- " ", +-- " zus wim jet", +-- "zus wim jet", +-- " zus wim jet", +-- " ", +-- }, "\n") + +-- local str = table.concat( { +-- " aaaa", +-- " bb", +-- " cccccc", +-- }, "\n") + +-- for k, v in table.sortedhash(utilities.strings.striplinepatterns) do +-- logs.report("stripper","method: %s, result: [[%s]]",k,utilities.strings.striplines(str,k)) +-- end + +-- inspect(strings.striplong([[ -- aaaa -- bb -- cccccc --- ]]) +-- ]])) function strings.nice(str) str = gsub(str,"[:%-+_]+"," ") -- maybe more @@ -178,6 +289,7 @@ end -- octal %...o number -- string %...s string number -- float %...f number +-- checked float %...F number -- exponential %...e number -- exponential %...E number -- autofloat %...g number @@ -249,10 +361,10 @@ strings.tracers = tracedchars function string.tracedchar(b) -- todo: table if type(b) == "number" then - return tracedchars[b] or (utfchar(b) .. " (U+" .. format('%05X',b) .. ")") + return tracedchars[b] or (utfchar(b) .. " (U+" .. format("%05X",b) .. ")") else local c = utfbyte(b) - return tracedchars[c] or (b .. " (U+" .. format('%05X',c) .. ")") + return tracedchars[c] or (b .. " (U+" .. (c and format("%05X",c) or "?????") .. ")") end end @@ -291,33 +403,67 @@ function number.sparseexponent(f,n) return tostring(n) end -local preamble = [[ -local type = type -local tostring = tostring -local tonumber = tonumber -local format = string.format -local concat = table.concat -local signed = number.signed -local points = number.points -local basepoints = number.basepoints -local utfchar = utf.char -local utfbyte = utf.byte -local lpegmatch = lpeg.match -local nspaces = string.nspaces -local tracedchar = string.tracedchar -local autosingle = string.autosingle -local autodouble = string.autodouble -local sequenced = table.sequenced -local formattednumber = number.formatted -local sparseexponent = number.sparseexponent -]] - local template = [[ %s %s return function(%s) return %s end ]] +local preamble, environment = "", { } + +if _LUAVERSION < 5.2 then + + preamble = [[ +local lpeg=lpeg +local type=type +local tostring=tostring +local tonumber=tonumber +local format=string.format +local concat=table.concat +local signed=number.signed +local points=number.points +local basepoints= number.basepoints +local utfchar=utf.char +local utfbyte=utf.byte +local lpegmatch=lpeg.match +local nspaces=string.nspaces +local tracedchar=string.tracedchar +local autosingle=string.autosingle +local autodouble=string.autodouble +local sequenced=table.sequenced +local formattednumber=number.formatted +local sparseexponent=number.sparseexponent + ]] + +else + + environment = { + global = global or _G, + lpeg = lpeg, + type = type, + tostring = tostring, + tonumber = tonumber, + format = string.format, + concat = table.concat, + signed = number.signed, + points = number.points, + basepoints = number.basepoints, + utfchar = utf.char, + utfbyte = utf.byte, + lpegmatch = lpeg.match, + nspaces = string.nspaces, + tracedchar = string.tracedchar, + autosingle = string.autosingle, + autodouble = string.autodouble, + sequenced = table.sequenced, + formattednumber = number.formatted, + sparseexponent = number.sparseexponent, + } + +end + +-- -- -- + local arguments = { "a1" } -- faster than previously used (select(n,...)) setmetatable(arguments, { __index = @@ -368,7 +514,7 @@ local format_i = function(f) if f and f ~= "" then return format("format('%%%si',a%s)",f,n) else - return format("format('%%i',a%s)",n) + return format("format('%%i',a%s)",n) -- why not just tostring() end end @@ -384,6 +530,24 @@ local format_f = function(f) return format("format('%%%sf',a%s)",f,n) end +-- The next one formats an integer as integer and very small values as zero. This is needed +-- for pdf backend code. +-- +-- 1.23 % 1 : 0.23 +-- - 1.23 % 1 : 0.77 +-- +-- We could probably use just %s with integers but who knows what Lua 5.3 will do? So let's +-- for the moment use %i. + +local format_F = function(f) -- beware, no cast to number + n = n + 1 + if not f or f == "" then + return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n) + else + return format("format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)",n,f,n) + end +end + local format_g = function(f) n = n + 1 return format("format('%%%sg',a%s)",f,n) @@ -657,7 +821,7 @@ local builder = Cs { "start", V("!") -- new + V("s") + V("q") + V("i") + V("d") - + V("f") + V("g") + V("G") + V("e") + V("E") + + V("f") + V("F") + V("g") + V("G") + V("e") + V("E") + V("x") + V("X") + V("o") -- + V("c") @@ -680,7 +844,7 @@ local builder = Cs { "start", + V("m") + V("M") -- new + V("z") -- new -- - + V("*") -- ignores probably messed up % + -- + V("?") -- ignores probably messed up % ) + V("*") ) @@ -692,6 +856,7 @@ local builder = Cs { "start", ["i"] = (prefix_any * P("i")) / format_i, -- %i => regular %i (integer) ["d"] = (prefix_any * P("d")) / format_d, -- %d => regular %d (integer) ["f"] = (prefix_any * P("f")) / format_f, -- %f => regular %f (float) + ["F"] = (prefix_any * P("F")) / format_F, -- %F => regular %f (float) but 0/1 check ["g"] = (prefix_any * P("g")) / format_g, -- %g => regular %g (float) ["G"] = (prefix_any * P("G")) / format_G, -- %G => regular %G (float) ["e"] = (prefix_any * P("e")) / format_e, -- %e => regular %e (float) @@ -734,34 +899,45 @@ local builder = Cs { "start", ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring) -- ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest, -- rest (including %%) + ["?"] = Cs(((1-P("%"))^1 )^1) / format_rest, -- rest (including %%) -- ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension, } -- we can be clever and only alias what is needed +-- local direct = Cs ( +-- P("%")/"" +-- * Cc([[local format = string.format return function(str) return format("%]]) +-- * (S("+- .") + R("09"))^0 +-- * S("sqidfgGeExXo") +-- * Cc([[",str) end]]) +-- * P(-1) +-- ) + local direct = Cs ( - P("%")/"" - * Cc([[local format = string.format return function(str) return format("%]]) - * (S("+- .") + R("09"))^0 - * S("sqidfgGeExXo") - * Cc([[",str) end]]) - * P(-1) - ) + P("%") + * (S("+- .") + R("09"))^0 + * S("sqidfgGeExXo") + * P(-1) / [[local format = string.format return function(str) return format("%0",str) end]] +) local function make(t,str) local f local p local p = lpegmatch(direct,str) if p then + -- f = loadstripped(p)() + -- print("builder 1 >",p) f = loadstripped(p)() else n = 0 - p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n + -- p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n + p = lpegmatch(builder,str,1,t._connector_,t._extensions_) -- after this we know n if n > 0 then p = format(template,preamble,t._preamble_,arguments[n],p) --- print("builder>",p) - f = loadstripped(p)() + -- print("builder 2 >",p) + f = loadstripped(p,t._environment_)() -- t._environment is not populated (was experiment) else f = function() return str end end @@ -816,10 +992,28 @@ strings.formatters = { } -- table (metatable) in which case we could better keep a count and -- clear that table when a threshold is reached -function strings.formatters.new() - local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter" } - setmetatable(t, { __index = make, __call = use }) - return t +-- _connector_ is an experiment + +if _LUAVERSION < 5.2 then + + function strings.formatters.new(noconcat) + local t = { _type_ = "formatter", _connector_ = noconcat and "," or "..", _extensions_ = { }, _preamble_ = preamble, _environment_ = { } } + setmetatable(t, { __index = make, __call = use }) + return t + end + +else + + function strings.formatters.new(noconcat) + local e = { } -- better make a copy as we can overload + for k, v in next, environment do + e[k] = v + end + local t = { _type_ = "formatter", _connector_ = noconcat and "," or "..", _extensions_ = { }, _preamble_ = "", _environment_ = e } + setmetatable(t, { __index = make, __call = use }) + return t + end + end -- function strings.formatters.new() @@ -838,8 +1032,12 @@ string.formatter = function(str,...) return formatters[str](...) end -- someti local function add(t,name,template,preamble) if type(t) == "table" and t._type_ == "formatter" then t._extensions_[name] = template or "%s" - if preamble then + if type(preamble) == "string" then t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload ! + elseif type(preamble) == "table" then + for k, v in next, preamble do + t._environment_[k] = v + end end end end @@ -856,9 +1054,23 @@ patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n" -- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but -- faster again when other q-escapables are found (the ones we don't need to escape) -add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) -add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) -add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]]) +-- add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) +-- add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) +-- add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]]) + +if _LUAVERSION < 5.2 then + + add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],"local xmlescape = lpeg.patterns.xmlescape") + add(formatters,"tex",[[lpegmatch(texescape,%s)]],"local texescape = lpeg.patterns.texescape") + add(formatters,"lua",[[lpegmatch(luaescape,%s)]],"local luaescape = lpeg.patterns.luaescape") + +else + + add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],{ xmlescape = lpeg.patterns.xmlescape }) + add(formatters,"tex",[[lpegmatch(texescape,%s)]],{ texescape = lpeg.patterns.texescape }) + add(formatters,"lua",[[lpegmatch(luaescape,%s)]],{ luaescape = lpeg.patterns.luaescape }) + +end -- -- yes or no: -- @@ -885,3 +1097,29 @@ add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patte -- string.formatteds = formatteds -- -- setmetatable(formatteds, { __index = make, __call = use }) + +-- This is a somewhat silly one used in commandline reconstruction but the older +-- method, using a combination of fine, gsub, quoted and unquoted was not that +-- reliable. +-- +-- '"foo"bar \"and " whatever"' => "foo\"bar \"and \" whatever" +-- 'foo"bar \"and " whatever' => "foo\"bar \"and \" whatever" + +local dquote = patterns.dquote -- P('"') +local equote = patterns.escaped + dquote / '\\"' + 1 +local space = patterns.space +local cquote = Cc('"') + +local pattern = + Cs(dquote * (equote - P(-2))^0 * dquote) -- we keep the outer but escape unescaped ones + + Cs(cquote * (equote - space)^0 * space * equote^0 * cquote) -- we escape unescaped ones + +function string.optionalquoted(str) + return lpegmatch(pattern,str) or str +end + +local pattern = Cs((newline / os.newline + 1)^0) + +function string.replacenewlines(str) + return lpegmatch(pattern,str) +end diff --git a/tex/context/base/util-tab.lua b/tex/context/base/util-tab.lua index ae44269bb..618f34cee 100644 --- a/tex/context/base/util-tab.lua +++ b/tex/context/base/util-tab.lua @@ -11,7 +11,7 @@ utilities.tables = utilities.tables or { } local tables = utilities.tables local format, gmatch, gsub, sub = string.format, string.gmatch, string.gsub, string.sub -local concat, insert, remove = table.concat, table.insert, table.remove +local concat, insert, remove, sort = table.concat, table.insert, table.remove, table.sort local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring local type, next, rawset, tonumber, tostring, load, select = type, next, rawset, tonumber, tostring, load, select local lpegmatch, P, Cs, Cc = lpeg.match, lpeg.P, lpeg.Cs, lpeg.Cc @@ -21,27 +21,29 @@ local utftoeight = utf.toeight local splitter = lpeg.tsplitat(".") -function tables.definetable(target,nofirst,nolast) -- defines undefined tables - local composed, shortcut, t = nil, nil, { } +function utilities.tables.definetable(target,nofirst,nolast) -- defines undefined tables + local composed, t = nil, { } local snippets = lpegmatch(splitter,target) for i=1,#snippets - (nolast and 1 or 0) do local name = snippets[i] if composed then - composed = shortcut .. "." .. name - shortcut = shortcut .. "_" .. name - t[#t+1] = formatters["local %s = %s if not %s then %s = { } %s = %s end"](shortcut,composed,shortcut,shortcut,composed,shortcut) + composed = composed .. "." .. name + t[#t+1] = formatters["if not %s then %s = { } end"](composed,composed) else composed = name - shortcut = name if not nofirst then t[#t+1] = formatters["%s = %s or { }"](composed,composed) end end end - if nolast then - composed = shortcut .. "." .. snippets[#snippets] + if composed then + if nolast then + composed = composed .. "." .. snippets[#snippets] + end + return concat(t,"\n"), composed -- could be shortcut + else + return "", target end - return concat(t,"\n"), composed end -- local t = tables.definedtable("a","b","c","d") @@ -73,7 +75,7 @@ end function tables.migratetable(target,v,root) local t = root or _G - local names = string.split(target,".") + local names = lpegmatch(splitter,target) for i=1,#names-1 do local name = names[i] t[name] = t[name] or { } @@ -96,6 +98,17 @@ function tables.removevalue(t,value) -- todo: n end end +function tables.replacevalue(t,oldvalue,newvalue) + if oldvalue and newvalue then + for i=1,#t do + if t[i] == oldvalue then + t[i] = newvalue + -- replace all, so no: return + end + end + end +end + function tables.insertbeforevalue(t,value,extra) for i=1,#t do if t[i] == extra then @@ -316,7 +329,7 @@ function table.fastserialize(t,prefix) -- not sorted -- only number and string indices (currently) - local r = { prefix or "return" } + local r = { type(prefix) == "string" and prefix or "return" } local m = 1 local function fastserialize(t,outer) -- no mixes @@ -376,7 +389,6 @@ function table.fastserialize(t,prefix) end return r end - return concat(fastserialize(t,true)) end @@ -494,7 +506,8 @@ end -- The next version is somewhat faster, although in practice one will seldom -- serialize a lot using this one. Often the above variants are more efficient. --- If we would really need this a lot, we could hash q keys. +-- If we would really need this a lot, we could hash q keys, or just not used +-- indented code. -- char-def.lua : 0.53 -> 0.38 -- husayni.tma : 0.28 -> 0.19 @@ -558,8 +571,42 @@ function table.serialize(root,name,specification) local t -- = { } local n = 1 +-- local function simple_table(t) +-- local ts = #t +-- if ts > 0 then +-- local n = 0 +-- for _, v in next, t do +-- n = n + 1 +-- if type(v) == "table" then +-- return nil +-- end +-- end +-- if n == ts then +-- local tt = { } +-- local nt = 0 +-- for i=1,ts do +-- local v = t[i] +-- local tv = type(v) +-- nt = nt + 1 +-- if tv == "number" then +-- tt[nt] = v +-- elseif tv == "string" then +-- tt[nt] = format("%q",v) -- f_string(v) +-- elseif tv == "boolean" then +-- tt[nt] = v and "true" or "false" +-- else +-- return nil +-- end +-- end +-- return tt +-- end +-- end +-- return nil +-- end + local function simple_table(t) - if #t > 0 then + local nt = #t + if nt > 0 then local n = 0 for _, v in next, t do n = n + 1 @@ -567,19 +614,17 @@ function table.serialize(root,name,specification) return nil end end - if n == #t then + if n == nt then local tt = { } - local nt = 0 - for i=1,#t do + for i=1,nt do local v = t[i] local tv = type(v) - nt = nt + 1 if tv == "number" then - tt[nt] = v + tt[i] = v -- not needed tostring(v) elseif tv == "string" then - tt[nt] = format("%q",v) -- f_string(v) + tt[i] = format("%q",v) -- f_string(v) elseif tv == "boolean" then - tt[nt] = v and "true" or "false" + tt[i] = v and "true" or "false" else return nil end @@ -610,7 +655,7 @@ function table.serialize(root,name,specification) depth = depth + 1 end -- we could check for k (index) being number (cardinal) - if root and next(root) then + if root and next(root) ~= nil then local first = nil local last = 0 last = #root @@ -623,19 +668,19 @@ function table.serialize(root,name,specification) if last > 0 then first = 1 end - local sk = sortedkeys(root) -- inline fast version? + local sk = sortedkeys(root) -- inline fast version?\ for i=1,#sk do local k = sk[i] local v = root[k] local tv = type(v) local tk = type(k) - if first and tk == "number" and k >= first and k <= last then + if first and tk == "number" and k <= last and k >= first then if tv == "number" then n = n + 1 t[n] = f_val_num(depth,v) elseif tv == "string" then n = n + 1 t[n] = f_val_str(depth,v) elseif tv == "table" then - if not next(v) then + if next(v) == nil then n = n + 1 t[n] = f_val_not(depth) else local st = simple_table(v) @@ -665,13 +710,13 @@ function table.serialize(root,name,specification) n = n + 1 t[n] = f_key_boo_value_str(depth,k,v) end elseif tv == "table" then - if not next(v) then + if next(v) == nil then if tk == "number" then - n = n + 1 t[n] = f_key_num_value_not(depth,k,v) + n = n + 1 t[n] = f_key_num_value_not(depth,k) elseif tk == "string" then - n = n + 1 t[n] = f_key_str_value_not(depth,k,v) + n = n + 1 t[n] = f_key_str_value_not(depth,k) elseif tk == "boolean" then - n = n + 1 t[n] = f_key_boo_value_not(depth,k,v) + n = n + 1 t[n] = f_key_boo_value_not(depth,k) end else local st = simple_table(v) @@ -729,7 +774,7 @@ function table.serialize(root,name,specification) root._w_h_a_t_e_v_e_r_ = nil end -- Let's forget about empty tables. - if next(root) then + if next(root) ~= nil then do_serialize(root,name,1,0) end end diff --git a/tex/context/base/util-tpl.lua b/tex/context/base/util-tpl.lua index 67d058221..468dd429c 100644 --- a/tex/context/base/util-tpl.lua +++ b/tex/context/base/util-tpl.lua @@ -52,7 +52,7 @@ local sqlescape = lpeg.replacer { -- { "\t", "\\t" }, } -local sqlquoted = lpeg.Cs(lpeg.Cc("'") * sqlescape * lpeg.Cc("'")) +local sqlquoted = Cs(Cc("'") * sqlescape * Cc("'")) lpegpatterns.sqlescape = sqlescape lpegpatterns.sqlquoted = sqlquoted @@ -111,13 +111,26 @@ local luaescaper = escapers.lua local quotedluaescaper = quotedescapers.lua local function replacekeyunquoted(s,t,how,recurse) -- ".. \" " - local escaper = how and escapers[how] or luaescaper - return escaper(replacekey(s,t,how,recurse)) + if how == false then + return replacekey(s,t,how,recurse) + else + local escaper = how and escapers[how] or luaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local function replacekeyquoted(s,t,how,recurse) -- ".. \" " - local escaper = how and quotedescapers[how] or quotedluaescaper - return escaper(replacekey(s,t,how,recurse)) + if how == false then + return replacekey(s,t,how,recurse) + else + local escaper = how and quotedescapers[how] or quotedluaescaper + return escaper(replacekey(s,t,how,recurse)) + end +end + +local function replaceoptional(l,m,r,t,how,recurse) + local v = t[l] + return v and v ~= "" and lpegmatch(replacer,r,1,t,how or "lua",recurse or false) or "" end local single = P("%") -- test %test% test : resolves test @@ -135,12 +148,19 @@ local norquoted = rquoted / '' local nolquotedq = lquotedq / '' local norquotedq = rquotedq / '' -local key = nosingle * ((C((1-nosingle )^1) * Carg(1) * Carg(2) * Carg(3)) / replacekey ) * nosingle -local quoted = nolquotedq * ((C((1-norquotedq)^1) * Carg(1) * Carg(2) * Carg(3)) / replacekeyquoted ) * norquotedq -local unquoted = nolquoted * ((C((1-norquoted )^1) * Carg(1) * Carg(2) * Carg(3)) / replacekeyunquoted) * norquoted +local noloptional = P("%?") / '' +local noroptional = P("?%") / '' +local nomoptional = P(":") / '' + + +local args = Carg(1) * Carg(2) * Carg(3) +local key = nosingle * ((C((1-nosingle )^1) * args) / replacekey ) * nosingle +local quoted = nolquotedq * ((C((1-norquotedq )^1) * args) / replacekeyquoted ) * norquotedq +local unquoted = nolquoted * ((C((1-norquoted )^1) * args) / replacekeyunquoted) * norquoted +local optional = noloptional * ((C((1-nomoptional)^1) * nomoptional * C((1-noroptional)^1) * args) / replaceoptional) * noroptional local any = P(1) - replacer = Cs((unquoted + quoted + escape + key + any)^0) + replacer = Cs((unquoted + quoted + escape + optional + key + any)^0) local function replace(str,mapping,how,recurse) if mapping and str then @@ -156,6 +176,7 @@ end -- print(replace("test '%[x]%' test",{ x = [[a '%y%' a]], y = "oeps" },'sql',true)) -- print(replace([[test %[x]% test]],{ x = [[a "x" a]]})) -- print(replace([[test %(x)% test]],{ x = [[a "x" a]]})) +-- print(replace([[convert %?x: -x "%x%" ?% %?y: -y "%y%" ?%]],{ x = "yes" })) templates.replace = replace @@ -188,3 +209,5 @@ end -- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" })) -- inspect(utilities.templates.resolve({ one = "%two%", two = "two", three = "%three%" })) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false,true)) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false)) diff --git a/tex/context/base/x-asciimath.lua b/tex/context/base/x-asciimath.lua index 992c37eae..51f401e66 100644 --- a/tex/context/base/x-asciimath.lua +++ b/tex/context/base/x-asciimath.lua @@ -7,266 +7,2095 @@ if not modules then modules = { } end modules ['x-asciimath'] = { } --[[ldx-- -
Some backgrounds are discussed in
Some backgrounds are discussed in
test
+ +test
+ +\stopbuffer + +\startbuffer[test 2] + +test (hierna een lf) +test
+ +\stopbuffer + +\startbuffer[test 3] + +test (hierna een lf met lege regel) + +test
+ +\stopbuffer + +\startbuffer[test 4] + +test (hierna een lf met twee lege regels) + + +test
+ +\stopbuffer + +\startbuffer[test 5] + +test (hierna br geen lf)
test
test (hierna br met lf)
+test
test (hierna br met lf en lege regel)
+
+test
test (hierna br met lf en twee lege regels)
+
+
+test
test (hierna bold) bold test
+ +\stopbuffer + +\startbuffer[test 10] + +test (hierna lf met bold) +bold underlined test
+ +\stopbuffer + +\startbuffer[test 11] + +test (hierna lf met lege regel en bold) + +bold test
+ +\stopbuffer + +\startbuffer[test 12] + +test (hierna lf met lege regel en lf in bold) + + +bold + test
+ +\stopbuffer + +\startbuffer[test 13] + +test (hierna lf met lege regel en lf en lege regel in bold) + + + +bold + + test
+ +\stopbuffer + +\dorecurse{13}{\ShowExample{test #1}} + +\stoptext diff --git a/tex/context/base/x-math-svg.lua b/tex/context/base/x-math-svg.lua new file mode 100644 index 000000000..8a6288167 --- /dev/null +++ b/tex/context/base/x-math-svg.lua @@ -0,0 +1,176 @@ +if not modules then modules = { } end modules ['x-math-svg'] = { + version = 1.001, + comment = "companion to x-math-svg.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local tostring, type, next = tostring, type, next +local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs + +local xmlfirst = xml.first +local xmlconvert = xml.convert +local xmlload = xml.load +local xmlsave = xml.save +local xmlcollected = xml.collected +local xmldelete = xml.delete + +local loadtable = table.load +local savetable = table.save + +local replacesuffix = file.replacesuffix +local addsuffix = file.addsuffix +local removefile = os.remove +local isfile = lfs.isfile + +local formatters = string.formatters + +moduledata = moduledata or table.setmetatableindex("table") +local svgmath = moduledata.svgmath -- autodefined + +local namedata = { } +local pagedata = { } + +local statusname = "x-math-svg-status.lua" +local pdfname = "x-math-svg.pdf" + +local pdftosvg = os.which("mudraw") + +local f_make_tex = formatters[ [[context --global kpse:x-math-svg.mkvi --inputfile="%s" --svgstyle="%s" --batch --noconsole --once --purgeall]] ] +local f_make_svg = formatters[ [[mudraw -o "math-%%d.svg" "%s" 1-9999]] ] + +----- f_inline = formatters[ [[]] ] +local f_inline = formatters[ [[]] ] +local f_display = formatters[ [[]] ] +local f_style = formatters[ [[vertical-align:%p]] ] + +local f_math_tmp = formatters[ [[math-%i]] ] + +function svgmath.process(filename) + if not filename then + -- no filename given + return + elseif not isfile(filename) then + -- invalid filename + return + end + local index = 0 + local page = 0 + local blobs = { } + local root = xmlload(filename) + for mth in xmlcollected(root,"math") do + index = index + 1 + local blob = tostring(mth) + if blobs[blob] then + context.ReuseSVGMath(index,blobs[blob]) + else + page = page + 1 + buffers.assign(f_math_tmp(page),blob) + context.MakeSVGMath(index,page,mth.at.display) + blobs[blob] = page + end + end + context(function() + -- for tracing purposes: + for mathdata, pagenumber in next, blobs do + local p = pagedata[pagenumber] + p.mathml = mathdata + p.number = pagenumber + end + -- + savetable(statusname, { + pagedata = pagedata, + namedata = namedata, + }) + end) +end + +function svgmath.register(index,page,specification) + if specification then + pagedata[page] = specification + end + namedata[index] = page +end + +function svgmath.convert(filename,svgstyle) + if not filename then + -- no filename given + return false, "no filename" + elseif not isfile(filename) then + -- invalid filename + return false, "invalid filename" + elseif not pdftosvg then + return false, "mudraw is not installed" + end + + os.execute(f_make_tex(filename,svgstyle)) + + local data = loadtable(statusname) + if not data then + -- invalid tex run + return false, "invalid tex run" + elseif not next(data) then + return false, "no converson needed" + end + + local pagedata = data.pagedata + local namedata = data.namedata + + os.execute(f_make_svg(pdfname)) + + local root = xmlload(filename) + local index = 0 + local done = { } + local unique = 0 + + local between = (1-P("<"))^1/"" + local strip = Cs(( + (P("Analyzers run per script and/or language and are needed in order to +process features right.
+--ldx]]-- + +-- never use these numbers directly + +local s_init = 1 local s_rphf = 7 +local s_medi = 2 local s_half = 8 +local s_fina = 3 local s_pref = 9 +local s_isol = 4 local s_blwf = 10 +local s_mark = 5 local s_pstf = 11 +local s_rest = 6 + +local states = { + init = s_init, + medi = s_medi, + fina = s_fina, + isol = s_isol, + mark = s_mark, + rest = s_rest, + rphf = s_rphf, + half = s_half, + pref = s_pref, + blwf = s_blwf, + pstf = s_pstf, +} + +local features = { + init = s_init, + medi = s_medi, + fina = s_fina, + isol = s_isol, + -- mark = s_mark, + -- rest = s_rest, + rphf = s_rphf, + half = s_half, + pref = s_pref, + blwf = s_blwf, + pstf = s_pstf, +} + +analyzers.states = states +analyzers.features = features + +-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script +-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace + +function analyzers.setstate(head,font) + local useunicodemarks = analyzers.useunicodemarks + local tfmdata = fontdata[font] + local descriptions = tfmdata.descriptions + local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean + current = tonut(current) + while current do + local id = getid(current) + if id == glyph_code and getfont(current) == font then + done = true + local char = getchar(current) + local d = descriptions[char] + if d then + if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then + done = true + setprop(current,a_state,s_mark) + elseif n == 0 then + first, last, n = current, current, 1 + setprop(current,a_state,s_init) + else + last, n = current, n+1 + setprop(current,a_state,s_medi) + end + else -- finish + if first and first == last then + setprop(last,a_state,s_isol) + elseif last then + setprop(last,a_state,s_fina) + end + first, last, n = nil, nil, 0 + end + elseif id == disc_code then + -- always in the middle + setprop(current,a_state,s_medi) + last = current + else -- finish + if first and first == last then + setprop(last,a_state,s_isol) + elseif last then + setprop(last,a_state,s_fina) + end + first, last, n = nil, nil, 0 + if id == math_code then + current = end_of_math(current) + end + end + current = getnext(current) + end + if first and first == last then + setprop(last,a_state,s_isol) + elseif last then + setprop(last,a_state,s_fina) + end + return head, done +end + +-- in the future we will use language/script attributes instead of the +-- font related value, but then we also need dynamic features which is +-- somewhat slower; and .. we need a chain of them + +local function analyzeinitializer(tfmdata,value) -- attr + local script, language = otf.scriptandlanguage(tfmdata) -- attr + local action = initializers[script] + if not action then + -- skip + elseif type(action) == "function" then + return action(tfmdata,value) + else + local action = action[language] + if action then + return action(tfmdata,value) + end + end +end + +local function analyzeprocessor(head,font,attr) + local tfmdata = fontdata[font] + local script, language = otf.scriptandlanguage(tfmdata,attr) + local action = methods[script] + if not action then + -- skip + elseif type(action) == "function" then + return action(head,font,attr) + else + action = action[language] + if action then + return action(head,font,attr) + end + end + return head, false +end + +registerotffeature { + name = "analyze", + description = "analysis of character classes", + default = true, + initializers = { + node = analyzeinitializer, + }, + processors = { + position = 1, + node = analyzeprocessor, + } +} + +-- latin + +methods.latn = analyzers.setstate + +-- This info eventually can go into char-def and we will have a state +-- table for generic then (unicode recognized all states but in practice +-- only has only +-- +-- isolated : isol +-- final : isol_fina +-- medial : isol_fina_medi_init +-- +-- so in practice, without analyzer it's rather useless info which is +-- why having it in char-def makes only sense for special purposes (like) +-- like tracing cq. visualizing. + +local tatweel = 0x0640 +local zwnj = 0x200C +local zwj = 0x200D + +local isolated = { -- isol + [0x0600] = true, [0x0601] = true, [0x0602] = true, [0x0603] = true, + [0x0604] = true, + [0x0608] = true, [0x060B] = true, [0x0621] = true, [0x0674] = true, + [0x06DD] = true, + -- mandaic + [0x0856] = true, [0x0858] = true, [0x0857] = true, + -- n'ko + [0x07FA] = true, + -- also here: + [zwnj] = true, + -- 7 + [0x08AD] = true, +} + +local final = { -- isol_fina + [0x0622] = true, [0x0623] = true, [0x0624] = true, [0x0625] = true, + [0x0627] = true, [0x0629] = true, [0x062F] = true, [0x0630] = true, + [0x0631] = true, [0x0632] = true, [0x0648] = true, [0x0671] = true, + [0x0672] = true, [0x0673] = true, [0x0675] = true, [0x0676] = true, + [0x0677] = true, [0x0688] = true, [0x0689] = true, [0x068A] = true, + [0x068B] = true, [0x068C] = true, [0x068D] = true, [0x068E] = true, + [0x068F] = true, [0x0690] = true, [0x0691] = true, [0x0692] = true, + [0x0693] = true, [0x0694] = true, [0x0695] = true, [0x0696] = true, + [0x0697] = true, [0x0698] = true, [0x0699] = true, [0x06C0] = true, + [0x06C3] = true, [0x06C4] = true, [0x06C5] = true, [0x06C6] = true, + [0x06C7] = true, [0x06C8] = true, [0x06C9] = true, [0x06CA] = true, + [0x06CB] = true, [0x06CD] = true, [0x06CF] = true, [0x06D2] = true, + [0x06D3] = true, [0x06D5] = true, [0x06EE] = true, [0x06EF] = true, + [0x0759] = true, [0x075A] = true, [0x075B] = true, [0x076B] = true, + [0x076C] = true, [0x0771] = true, [0x0773] = true, [0x0774] = true, + [0x0778] = true, [0x0779] = true, + [0x08AA] = true, [0x08AB] = true, [0x08AC] = true, + [0xFEF5] = true, [0xFEF7] = true, [0xFEF9] = true, [0xFEFB] = true, + -- syriac + [0x0710] = true, [0x0715] = true, [0x0716] = true, [0x0717] = true, + [0x0718] = true, [0x0719] = true, [0x0728] = true, [0x072A] = true, + [0x072C] = true, [0x071E] = true, + [0x072F] = true, [0x074D] = true, + -- mandaic + [0x0840] = true, [0x0849] = true, [0x0854] = true, [0x0846] = true, + [0x084F] = true, + -- 7 + [0x08AE] = true, [0x08B1] = true, [0x08B2] = true, +} + +local medial = { -- isol_fina_medi_init + [0x0626] = true, [0x0628] = true, [0x062A] = true, [0x062B] = true, + [0x062C] = true, [0x062D] = true, [0x062E] = true, [0x0633] = true, + [0x0634] = true, [0x0635] = true, [0x0636] = true, [0x0637] = true, + [0x0638] = true, [0x0639] = true, [0x063A] = true, [0x063B] = true, + [0x063C] = true, [0x063D] = true, [0x063E] = true, [0x063F] = true, + [0x0641] = true, [0x0642] = true, [0x0643] = true, + [0x0644] = true, [0x0645] = true, [0x0646] = true, [0x0647] = true, + [0x0649] = true, [0x064A] = true, [0x066E] = true, [0x066F] = true, + [0x0678] = true, [0x0679] = true, [0x067A] = true, [0x067B] = true, + [0x067C] = true, [0x067D] = true, [0x067E] = true, [0x067F] = true, + [0x0680] = true, [0x0681] = true, [0x0682] = true, [0x0683] = true, + [0x0684] = true, [0x0685] = true, [0x0686] = true, [0x0687] = true, + [0x069A] = true, [0x069B] = true, [0x069C] = true, [0x069D] = true, + [0x069E] = true, [0x069F] = true, [0x06A0] = true, [0x06A1] = true, + [0x06A2] = true, [0x06A3] = true, [0x06A4] = true, [0x06A5] = true, + [0x06A6] = true, [0x06A7] = true, [0x06A8] = true, [0x06A9] = true, + [0x06AA] = true, [0x06AB] = true, [0x06AC] = true, [0x06AD] = true, + [0x06AE] = true, [0x06AF] = true, [0x06B0] = true, [0x06B1] = true, + [0x06B2] = true, [0x06B3] = true, [0x06B4] = true, [0x06B5] = true, + [0x06B6] = true, [0x06B7] = true, [0x06B8] = true, [0x06B9] = true, + [0x06BA] = true, [0x06BB] = true, [0x06BC] = true, [0x06BD] = true, + [0x06BE] = true, [0x06BF] = true, [0x06C1] = true, [0x06C2] = true, + [0x06CC] = true, [0x06CE] = true, [0x06D0] = true, [0x06D1] = true, + [0x06FA] = true, [0x06FB] = true, [0x06FC] = true, [0x06FF] = true, + [0x0750] = true, [0x0751] = true, [0x0752] = true, [0x0753] = true, + [0x0754] = true, [0x0755] = true, [0x0756] = true, [0x0757] = true, + [0x0758] = true, [0x075C] = true, [0x075D] = true, [0x075E] = true, + [0x075F] = true, [0x0760] = true, [0x0761] = true, [0x0762] = true, + [0x0763] = true, [0x0764] = true, [0x0765] = true, [0x0766] = true, + [0x0767] = true, [0x0768] = true, [0x0769] = true, [0x076A] = true, + [0x076D] = true, [0x076E] = true, [0x076F] = true, [0x0770] = true, + [0x0772] = true, [0x0775] = true, [0x0776] = true, [0x0777] = true, + [0x077A] = true, [0x077B] = true, [0x077C] = true, [0x077D] = true, + [0x077E] = true, [0x077F] = true, + [0x08A0] = true, [0x08A2] = true, [0x08A4] = true, [0x08A5] = true, + [0x08A6] = true, [0x0620] = true, [0x08A8] = true, [0x08A9] = true, + [0x08A7] = true, [0x08A3] = true, + -- syriac + [0x0712] = true, [0x0713] = true, [0x0714] = true, [0x071A] = true, + [0x071B] = true, [0x071C] = true, [0x071D] = true, [0x071F] = true, + [0x0720] = true, [0x0721] = true, [0x0722] = true, [0x0723] = true, + [0x0724] = true, [0x0725] = true, [0x0726] = true, [0x0727] = true, + [0x0729] = true, [0x072B] = true, [0x072D] = true, [0x072E] = true, + [0x074E] = true, [0x074F] = true, + -- mandaic + [0x0841] = true, [0x0842] = true, [0x0843] = true, [0x0844] = true, + [0x0845] = true, [0x0847] = true, [0x0848] = true, [0x0855] = true, + [0x0851] = true, [0x084E] = true, [0x084D] = true, [0x084A] = true, + [0x084B] = true, [0x084C] = true, [0x0850] = true, [0x0852] = true, + [0x0853] = true, + -- n'ko + [0x07D7] = true, [0x07E8] = true, [0x07D9] = true, [0x07EA] = true, + [0x07CA] = true, [0x07DB] = true, [0x07CC] = true, [0x07DD] = true, + [0x07CE] = true, [0x07DF] = true, [0x07D4] = true, [0x07E5] = true, + [0x07E9] = true, [0x07E7] = true, [0x07E3] = true, [0x07E2] = true, + [0x07E0] = true, [0x07E1] = true, [0x07DE] = true, [0x07DC] = true, + [0x07D1] = true, [0x07DA] = true, [0x07D8] = true, [0x07D6] = true, + [0x07D2] = true, [0x07D0] = true, [0x07CF] = true, [0x07CD] = true, + [0x07CB] = true, [0x07D3] = true, [0x07E4] = true, [0x07D5] = true, + [0x07E6] = true, + -- also here: + [tatweel]= true, [zwj] = true, + -- 7 + [0x08A1] = true, [0x08AF] = true, [0x08B0] = true, +} + +local arab_warned = { } + +-- todo: gref + +local function warning(current,what) + local char = getchar(current) + if not arab_warned[char] then + log.report("analyze","arab: character %C has no %a class",char,what) + arab_warned[char] = true + end +end + +-- potential optimization: local medial_final = table.merged(medial,final) + +local function finish(first,last) + if last then + if first == last then + local fc = getchar(first) + if medial[fc] or final[fc] then + setprop(first,a_state,s_isol) + else + warning(first,"isol") + setprop(first,a_state,s_error) + end + else + local lc = getchar(last) + if medial[lc] or final[lc] then + -- if laststate == 1 or laststate == 2 or laststate == 4 then + setprop(last,a_state,s_fina) + else + warning(last,"fina") + setprop(last,a_state,s_error) + end + end + first, last = nil, nil + elseif first then + -- first and last are either both set so we never com here + local fc = getchar(first) + if medial[fc] or final[fc] then + setprop(first,a_state,s_isol) + else + warning(first,"isol") + setprop(first,a_state,s_error) + end + first = nil + end + return first, last +end + +function methods.arab(head,font,attr) + local useunicodemarks = analyzers.useunicodemarks + local tfmdata = fontdata[font] + local marks = tfmdata.resources.marks + local first, last, current, done = nil, nil, head, false + current = tonut(current) + while current do + local id = getid(current) + if id == glyph_code and getfont(current) == font and getsubtype(current)<256 and not getprop(current,a_state) then + done = true + local char = getchar(current) + if marks[char] or (useunicodemarks and categories[char] == "mn") then + setprop(current,a_state,s_mark) + elseif isolated[char] then -- can be zwj or zwnj too + first, last = finish(first,last) + setprop(current,a_state,s_isol) + first, last = nil, nil + elseif not first then + if medial[char] then + setprop(current,a_state,s_init) + first, last = first or current, current + elseif final[char] then + setprop(current,a_state,s_isol) + first, last = nil, nil + else -- no arab + first, last = finish(first,last) + end + elseif medial[char] then + first, last = first or current, current + setprop(current,a_state,s_medi) + elseif final[char] then + if getprop(last,a_state) ~= s_init then + -- tricky, we need to check what last may be ! + setprop(last,a_state,s_medi) + end + setprop(current,a_state,s_fina) + first, last = nil, nil + elseif char >= 0x0600 and char <= 0x06FF then -- needs checking + setprop(current,a_state,s_rest) + first, last = finish(first,last) + else -- no + first, last = finish(first,last) + end + else + if first or last then + first, last = finish(first,last) + end + if id == math_code then + current = end_of_math(current) + end + end + current = getnext(current) + end + if first or last then + finish(first,last) + end + return head, done +end + +methods.syrc = methods.arab +methods.mand = methods.arab +methods.nko = methods.arab + +directives.register("otf.analyze.useunicodemarks",function(v) + analyzers.useunicodemarks = v +end) diff --git a/tex/generic/context/luatex/luatex-fonts-otn.lua b/tex/generic/context/luatex/luatex-fonts-otn.lua new file mode 100644 index 000000000..dd3aa6153 --- /dev/null +++ b/tex/generic/context/luatex/luatex-fonts-otn.lua @@ -0,0 +1,2893 @@ +if not modules then modules = { } end modules ['font-otn'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- todo: looks like we have a leak somewhere (probably in ligatures) +-- todo: copy attributes to disc + +-- this is a context version which can contain experimental code, but when we +-- have serious patches we also need to change the other two font-otn files + +-- preprocessors = { "nodes" } + +-- anchor class : mark, mkmk, curs, mklg (todo) +-- anchor type : mark, basechar, baselig, basemark, centry, cexit, max (todo) + +-- this is still somewhat preliminary and it will get better in due time; +-- much functionality could only be implemented thanks to the husayni font +-- of Idris Samawi Hamid to who we dedicate this module. + +-- in retrospect it always looks easy but believe it or not, it took a lot +-- of work to get proper open type support done: buggy fonts, fuzzy specs, +-- special made testfonts, many skype sessions between taco, idris and me, +-- torture tests etc etc ... unfortunately the code does not show how much +-- time it took ... + +-- todo: +-- +-- extension infrastructure (for usage out of context) +-- sorting features according to vendors/renderers +-- alternative loop quitters +-- check cursive and r2l +-- find out where ignore-mark-classes went +-- default features (per language, script) +-- handle positions (we need example fonts) +-- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) +-- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +-- remove some optimizations (when I have a faster machine) +-- +-- maybe redo the lot some way (more context specific) + +--[[ldx-- +This module is a bit more split up that I'd like but since we also want to test
+with plain
The specification of OpenType is kind of vague. Apart from a lack of a proper +free specifications there's also the problem that Microsoft and Adobe +may have their own interpretation of how and in what order to apply features. +In general the Microsoft website has more detailed specifications and is a +better reference. There is also some information in the FontForge help files.
+ +Because there is so much possible, fonts might contain bugs and/or be made to +work with certain rederers. These may evolve over time which may have the side +effect that suddenly fonts behave differently.
+ +After a lot of experiments (mostly by Taco, me and Idris) we're now at yet another
+implementation. Of course all errors are mine and of course the code can be
+improved. There are quite some optimizations going on here and processing speed
+is currently acceptable. Not all functions are implemented yet, often because I
+lack the fonts for testing. Many scripts are not yet supported either, but I will
+look into them as soon as
Because there are different interpretations possible, I will extend the code +with more (configureable) variants. I can also add hooks for users so that they can +write their own extensions.
+ +Glyphs are indexed not by unicode but in their own way. This is because there is no
+relationship with unicode at all, apart from the fact that a font might cover certain
+ranges of characters. One character can have multiple shapes. However, at the
+
The raw table as it coms from
This module is sparsely documented because it is a moving target. The table format +of the reader changes and we experiment a lot with different methods for supporting +features.
+ +As with the
Incrementing the version number will force a re-cache. We jump the number by one
+when there's a fix in the
We get hits on a mark, but we're not sure if the it has to be applied so +we need to explicitly test for basechar, baselig and basemark entries.
+--ldx]]-- + +function handlers.gpos_mark2base(head,start,kind,lookupname,markanchors,sequence) + local markchar = getchar(start) + if marks[markchar] then + local base = getprev(start) -- [glyph] [start=mark] + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + local basechar = getchar(base) + if marks[basechar] then + while true do + base = getprev(base) + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + basechar = getchar(base) + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return head, start, false + end + end + end + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + end + if baseanchors then + local baseanchors = baseanchors['basechar'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar]) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%p,%p)", + pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + end + end + end + if trace_bugs then + logwarning("%s, no matching anchors for mark %s and base %s",pref(kind,lookupname),gref(markchar),gref(basechar)) + end + end + elseif trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) + end + elseif trace_bugs then + logwarning("%s: prev node is no char",pref(kind,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) + end + return head, start, false +end + +function handlers.gpos_mark2ligature(head,start,kind,lookupname,markanchors,sequence) + -- check chainpos variant + local markchar = getchar(start) + if marks[markchar] then + local base = getprev(start) -- [glyph] [optional marks] [start=mark] + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + local basechar = getchar(base) + if marks[basechar] then + while true do + base = getprev(base) + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + basechar = getchar(base) + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return head, start, false + end + end + end + local index = getligaindex(start) + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + if baseanchors then + local baseanchors = baseanchors['baselig'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor, ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + ba = ba[index] + if ba then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar]) -- index + if trace_marks then + logprocess("%s, anchor %s, index %s, bound %s: anchoring mark %s to baselig %s at index %s => (%p,%p)", + pref(kind,lookupname),anchor,index,bound,gref(markchar),gref(basechar),index,dx,dy) + end + return head, start, true + else + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and baselig %s with index %a",pref(kind,lookupname),gref(markchar),gref(basechar),index) + end + end + end + end + end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and baselig %s",pref(kind,lookupname),gref(markchar),gref(basechar)) + end + end + end + elseif trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) + end + elseif trace_bugs then + logwarning("%s: prev node is no char",pref(kind,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) + end + return head, start, false +end + +function handlers.gpos_mark2mark(head,start,kind,lookupname,markanchors,sequence) + local markchar = getchar(start) + if marks[markchar] then + local base = getprev(start) -- [glyph] [basemark] [start=mark] + local slc = getligaindex(start) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = getligaindex(base) + if blc and blc ~= slc then + base = getprev(base) + else + break + end + end + end + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then -- subtype test can go + local basechar = getchar(base) + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + if baseanchors then + baseanchors = baseanchors['basemark'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar]) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%p,%p)", + pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + end + end + end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and basemark %s",pref(kind,lookupname),gref(markchar),gref(basechar)) + end + end + end + elseif trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) + end + elseif trace_bugs then + logwarning("%s: prev node is no mark",pref(kind,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) + end + return head, start, false +end + +function handlers.gpos_cursive(head,start,kind,lookupname,exitanchors,sequence) -- to be checked + local alreadydone = cursonce and getprop(start,a_cursbase) + if not alreadydone then + local done = false + local startchar = getchar(start) + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) + end + else + local nxt = getnext(start) + while not done and nxt and getid(nxt) == glyph_code and getfont(nxt) == currentfont and getsubtype(nxt)<256 do + local nextchar = getchar(nxt) + if marks[nextchar] then + -- should not happen (maybe warning) + nxt = getnext(nxt) + else + local entryanchors = descriptions[nextchar] + if entryanchors then + entryanchors = entryanchors.anchors + if entryanchors then + entryanchors = entryanchors['centry'] + if entryanchors then + local al = anchorlookups[lookupname] + for anchor, entry in next, entryanchors do + if al[anchor] then + local exit = exitanchors[anchor] + if exit then + local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%p,%p) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) + end + done = true + break + end + end + end + end + end + elseif trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) + onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) + end + break + end + end + end + return head, start, done + else + if trace_cursive and trace_details then + logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(getchar(start)),alreadydone) + end + return head, start, false + end +end + +function handlers.gpos_single(head,start,kind,lookupname,kerns,sequence) + local startchar = getchar(start) + local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting single %s by (%p,%p) and correction (%p,%p)",pref(kind,lookupname),gref(startchar),dx,dy,w,h) + end + return head, start, false +end + +function handlers.gpos_pair(head,start,kind,lookupname,kerns,sequence) + -- todo: kerns in disc nodes: pre, post, replace -> loop over disc too + -- todo: kerns in components of ligatures + local snext = getnext(start) + if not snext then + return head, start, false + else + local prev, done = start, false + local factor = tfmdata.parameters.factor + local lookuptype = lookuptypes[lookupname] + while snext and getid(snext) == glyph_code and getfont(snext) == currentfont and getsubtype(snext)<256 do + local nextchar = getchar(snext) + local krn = kerns[nextchar] + if not krn and marks[nextchar] then + prev = snext + snext = getnext(snext) + else + if not krn then + -- skip + elseif type(krn) == "table" then + if lookuptype == "pair" then -- probably not needed + local a, b = krn[2], krn[3] + if a and #a > 0 then + local startchar = getchar(start) + local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) + end + end + if b and #b > 0 then + local startchar = getchar(start) + local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) + if trace_kerns then + logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) + end + end + else -- wrong ... position has different entries + report_process("%s: check this out (old kern stuff)",pref(kind,lookupname)) + -- local a, b = krn[2], krn[6] + -- if a and a ~= 0 then + -- local k = setkern(snext,factor,rlmode,a) + -- if trace_kerns then + -- logprocess("%s: inserting first kern %s between %s and %s",pref(kind,lookupname),k,gref(getchar(prev)),gref(nextchar)) + -- end + -- end + -- if b and b ~= 0 then + -- logwarning("%s: ignoring second kern xoff %s",pref(kind,lookupname),b*factor) + -- end + end + done = true + elseif krn ~= 0 then + local k = setkern(snext,factor,rlmode,krn) + if trace_kerns then + logprocess("%s: inserting kern %s between %s and %s",pref(kind,lookupname),k,gref(getchar(prev)),gref(nextchar)) + end + done = true + end + break + end + end + return head, start, done + end +end + +--[[ldx-- +I will implement multiple chain replacements once I run into a font that uses +it. It's not that complex to handle.
+--ldx]]-- + +local chainmores = { } +local chainprocs = { } + +local function logprocess(...) + if trace_steps then + registermessage(...) + end + report_subchain(...) +end + +local logwarning = report_subchain + +local function logprocess(...) + if trace_steps then + registermessage(...) + end + report_chain(...) +end + +local logwarning = report_chain + +-- We could share functions but that would lead to extra function calls with many +-- arguments, redundant tests and confusing messages. + +function chainprocs.chainsub(head,start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname) + logwarning("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) + return head, start, false +end + +function chainmores.chainsub(head,start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) + logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) + return head, start, false +end + +-- The reversesub is a special case, which is why we need to store the replacements +-- in a bit weird way. There is no lookup and the replacement comes from the lookup +-- itself. It is meant mostly for dealing with Urdu. + +function chainprocs.reversesub(head,start,stop,kind,chainname,currentcontext,lookuphash,replacements) + local char = getchar(start) + local replacement = replacements[char] + if replacement then + if trace_singles then + logprocess("%s: single reverse replacement of %s by %s",cref(kind,chainname),gref(char),gref(replacement)) + end + resetinjection(start) + setfield(start,"char",replacement) + return head, start, true + else + return head, start, false + end +end + +--[[ldx-- +This chain stuff is somewhat tricky since we can have a sequence of actions to be +applied: single, alternate, multiple or ligature where ligature can be an invalid +one in the sense that it will replace multiple by one but not neccessary one that +looks like the combination (i.e. it is the counterpart of multiple then). For +example, the following is valid:
+ +Therefore we we don't really do the replacement here already unless we have the +single lookup case. The efficiency of the replacements can be improved by deleting +as less as needed but that would also make the code even more messy.
+--ldx]]-- + +-- local function delete_till_stop(head,start,stop,ignoremarks) -- keeps start +-- local n = 1 +-- if start == stop then +-- -- done +-- elseif ignoremarks then +-- repeat -- start x x m x x stop => start m +-- local next = getnext(start) +-- if not marks[getchar(next)] then +-- local components = getfield(next,"components") +-- if components then -- probably not needed +-- flush_node_list(components) +-- end +-- head = delete_node(head,next) +-- end +-- n = n + 1 +-- until next == stop +-- else -- start x x x stop => start +-- repeat +-- local next = getnext(start) +-- local components = getfield(next,"components") +-- if components then -- probably not needed +-- flush_node_list(components) +-- end +-- head = delete_node(head,next) +-- n = n + 1 +-- until next == stop +-- end +-- return head, n +-- end + +--[[ldx-- +Here we replace start by a single variant, First we delete the rest of the +match.
+--ldx]]-- + +function chainprocs.gsub_single(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) + -- todo: marks ? + local current = start + local subtables = currentlookup.subtables + if #subtables > 1 then + logwarning("todo: check if we need to loop over the replacements: %s",concat(subtables," ")) + end + while current do + if getid(current) == glyph_code then + local currentchar = getchar(current) + local lookupname = subtables[1] -- only 1 + local replacement = lookuphash[lookupname] + if not replacement then + if trace_bugs then + logwarning("%s: no single hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) + end + else + replacement = replacement[currentchar] + if not replacement or replacement == "" then + if trace_bugs then + logwarning("%s: no single for %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar)) + end + else + if trace_singles then + logprocess("%s: replacing single %s by %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar),gref(replacement)) + end + resetinjection(current) + setfield(current,"char",replacement) + end + end + return head, start, true + elseif current == stop then + break + else + current = getnext(current) + end + end + return head, start, false +end + +chainmores.gsub_single = chainprocs.gsub_single + +--[[ldx-- +Here we replace start by a sequence of new glyphs. First we delete the rest of +the match.
+--ldx]]-- + +function chainprocs.gsub_multiple(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + -- local head, n = delete_till_stop(head,start,stop) + local startchar = getchar(start) + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local replacements = lookuphash[lookupname] + if not replacements then + if trace_bugs then + logwarning("%s: no multiple hits",cref(kind,chainname,chainlookupname,lookupname)) + end + else + replacements = replacements[startchar] + if not replacements or replacement == "" then + if trace_bugs then + logwarning("%s: no multiple for %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar)) + end + else + if trace_multiples then + logprocess("%s: replacing %s by multiple characters %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar),gref(replacements)) + end + return multiple_glyphs(head,start,replacements,currentlookup.flags[1]) + end + end + return head, start, false +end + +chainmores.gsub_multiple = chainprocs.gsub_multiple + +--[[ldx-- +Here we replace start by new glyph. First we delete the rest of the match.
+--ldx]]-- + +-- char_1 mark_1 -> char_x mark_1 (ignore marks) +-- char_1 mark_1 -> char_x + +-- to be checked: do we always have just one glyph? +-- we can also have alternates for marks +-- marks come last anyway +-- are there cases where we need to delete the mark + +function chainprocs.gsub_alternate(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local current = start + local subtables = currentlookup.subtables + local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue + while current do + if getid(current) == glyph_code then -- is this check needed? + local currentchar = getchar(current) + local lookupname = subtables[1] + local alternatives = lookuphash[lookupname] + if not alternatives then + if trace_bugs then + logwarning("%s: no alternative hit",cref(kind,chainname,chainlookupname,lookupname)) + end + else + alternatives = alternatives[currentchar] + if alternatives then + local choice, comment = get_alternative_glyph(current,alternatives,value,trace_alternatives) + if choice then + if trace_alternatives then + logprocess("%s: replacing %s by alternative %a to %s, %s",cref(kind,chainname,chainlookupname,lookupname),gref(char),choice,gref(choice),comment) + end + resetinjection(start) + setfield(start,"char",choice) + else + if trace_alternatives then + logwarning("%s: no variant %a for %s, %s",cref(kind,chainname,chainlookupname,lookupname),value,gref(char),comment) + end + end + elseif trace_bugs then + logwarning("%s: no alternative for %s, %s",cref(kind,chainname,chainlookupname,lookupname),gref(currentchar),comment) + end + end + return head, start, true + elseif current == stop then + break + else + current = getnext(current) + end + end + return head, start, false +end + +chainmores.gsub_alternate = chainprocs.gsub_alternate + +--[[ldx-- +When we replace ligatures we use a helper that handles the marks. I might change +this function (move code inline and handle the marks by a separate function). We +assume rather stupid ligatures (no complex disc nodes).
+--ldx]]-- + +function chainprocs.gsub_ligature(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) + local startchar = getchar(start) + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local ligatures = lookuphash[lookupname] + if not ligatures then + if trace_bugs then + logwarning("%s: no ligature hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) + end + else + ligatures = ligatures[startchar] + if not ligatures then + if trace_bugs then + logwarning("%s: no ligatures starting with %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) + end + else + local s = getnext(start) + local discfound = false + local last = stop + local nofreplacements = 0 + local skipmark = currentlookup.flags[1] + while s do + local id = getid(s) + if id == disc_code then + s = getnext(s) + discfound = true + else + local schar = getchar(s) + if skipmark and marks[schar] then -- marks + s = getnext(s) + else + local lg = ligatures[schar] + if lg then + ligatures, last, nofreplacements = lg, s, nofreplacements + 1 + if s == stop then + break + else + s = getnext(s) + end + else + break + end + end + end + end + local l2 = ligatures.ligature + if l2 then + if chainindex then + stop = last + end + if trace_ligatures then + if start == stop then + logprocess("%s: replacing character %s by ligature %s case 3",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(l2)) + else + logprocess("%s: replacing character %s upto %s by ligature %s case 4",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(getchar(stop)),gref(l2)) + end + end + head, start = toligature(kind,lookupname,head,start,stop,l2,currentlookup.flags[1],discfound) + return head, start, true, nofreplacements + elseif trace_bugs then + if start == stop then + logwarning("%s: replacing character %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) + else + logwarning("%s: replacing character %s upto %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(getchar(stop))) + end + end + end + end + return head, start, false, 0 +end + +chainmores.gsub_ligature = chainprocs.gsub_ligature + +function chainprocs.gpos_mark2base(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = getchar(start) + if marks[markchar] then + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] + end + if markanchors then + local base = getprev(start) -- [glyph] [start=mark] + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + local basechar = getchar(base) + if marks[basechar] then + while true do + base = getprev(base) + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + basechar = getchar(base) + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return head, start, false + end + end + end + local baseanchors = descriptions[basechar].anchors + if baseanchors then + local baseanchors = baseanchors['basechar'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar]) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%p,%p)", + cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + end + end + end + if trace_bugs then + logwarning("%s, no matching anchors for mark %s and base %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end + end + end + elseif trace_bugs then + logwarning("%s: prev node is no char",cref(kind,chainname,chainlookupname,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + end + return head, start, false +end + +function chainprocs.gpos_mark2ligature(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = getchar(start) + if marks[markchar] then + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] + end + if markanchors then + local base = getprev(start) -- [glyph] [optional marks] [start=mark] + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + local basechar = getchar(base) + if marks[basechar] then + while true do + base = getprev(base) + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then + basechar = getchar(base) + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",cref(kind,chainname,chainlookupname,lookupname),markchar) + end + return head, start, false + end + end + end + -- todo: like marks a ligatures hash + local index = getligaindex(start) + local baseanchors = descriptions[basechar].anchors + if baseanchors then + local baseanchors = baseanchors['baselig'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + ba = ba[index] + if ba then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar]) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to baselig %s at index %s => (%p,%p)", + cref(kind,chainname,chainlookupname,lookupname),anchor,a or bound,gref(markchar),gref(basechar),index,dx,dy) + end + return head, start, true + end + end + end + end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and baselig %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end + end + end + elseif trace_bugs then + logwarning("feature %s, lookup %s: prev node is no char",kind,lookupname) + end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + end + return head, start, false +end + +function chainprocs.gpos_mark2mark(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = getchar(start) + if marks[markchar] then + -- local markanchors = descriptions[markchar].anchors markanchors = markanchors and markanchors.mark + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] + end + if markanchors then + local base = getprev(start) -- [glyph] [basemark] [start=mark] + local slc = getligaindex(start) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = getligaindex(base) + if blc and blc ~= slc then + base = getprev(base) + else + break + end + end + end + if base and getid(base) == glyph_code and getfont(base) == currentfont and getsubtype(base)<256 then -- subtype test can go + local basechar = getchar(base) + local baseanchors = descriptions[basechar].anchors + if baseanchors then + baseanchors = baseanchors['basemark'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar]) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%p,%p)", + cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + end + return head, start, true + end + end + end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and basemark %s",gref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end + end + end + elseif trace_bugs then + logwarning("%s: prev node is no mark",cref(kind,chainname,chainlookupname,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + end + return head, start, false +end + +function chainprocs.gpos_cursive(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local alreadydone = cursonce and getprop(start,a_cursbase) + if not alreadydone then + local startchar = getchar(start) + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local exitanchors = lookuphash[lookupname] + if exitanchors then + exitanchors = exitanchors[startchar] + end + if exitanchors then + local done = false + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) + end + else + local nxt = getnext(start) + while not done and nxt and getid(nxt) == glyph_code and getfont(nxt) == currentfont and getsubtype(nxt)<256 do + local nextchar = getchar(nxt) + if marks[nextchar] then + -- should not happen (maybe warning) + nxt = getnext(nxt) + else + local entryanchors = descriptions[nextchar] + if entryanchors then + entryanchors = entryanchors.anchors + if entryanchors then + entryanchors = entryanchors['centry'] + if entryanchors then + local al = anchorlookups[lookupname] + for anchor, entry in next, entryanchors do + if al[anchor] then + local exit = exitanchors[anchor] + if exit then + local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%p,%p) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) + end + done = true + break + end + end + end + end + end + elseif trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) + onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) + end + break + end + end + end + return head, start, done + else + if trace_cursive and trace_details then + logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(getchar(start)),alreadydone) + end + return head, start, false + end + end + return head, start, false +end + +function chainprocs.gpos_single(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) + -- untested .. needs checking for the new model + local startchar = getchar(start) + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local kerns = lookuphash[lookupname] + if kerns then + kerns = kerns[startchar] -- needed ? + if kerns then + local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting single %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),dx,dy,w,h) + end + end + end + return head, start, false +end + +chainmores.gpos_single = chainprocs.gpos_single -- okay? + +-- when machines become faster i will make a shared function + +function chainprocs.gpos_pair(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) + local snext = getnext(start) + if snext then + local startchar = getchar(start) + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local kerns = lookuphash[lookupname] + if kerns then + kerns = kerns[startchar] + if kerns then + local lookuptype = lookuptypes[lookupname] + local prev, done = start, false + local factor = tfmdata.parameters.factor + while snext and getid(snext) == glyph_code and getfont(snext) == currentfont and getsubtype(snext)<256 do + local nextchar = getchar(snext) + local krn = kerns[nextchar] + if not krn and marks[nextchar] then + prev = snext + snext = getnext(snext) + else + if not krn then + -- skip + elseif type(krn) == "table" then + if lookuptype == "pair" then + local a, b = krn[2], krn[3] + if a and #a > 0 then + local startchar = getchar(start) + local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) + end + end + if b and #b > 0 then + local startchar = getchar(start) + local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) + if trace_kerns then + logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) + end + end + else + report_process("%s: check this out (old kern stuff)",cref(kind,chainname,chainlookupname)) + local a, b = krn[2], krn[6] + if a and a ~= 0 then + local k = setkern(snext,factor,rlmode,a) + if trace_kerns then + logprocess("%s: inserting first kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(getchar(prev)),gref(nextchar)) + end + end + if b and b ~= 0 then + logwarning("%s: ignoring second kern xoff %s",cref(kind,chainname,chainlookupname),b*factor) + end + end + done = true + elseif krn ~= 0 then + local k = setkern(snext,factor,rlmode,krn) + if trace_kerns then + logprocess("%s: inserting kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(getchar(prev)),gref(nextchar)) + end + done = true + end + break + end + end + return head, start, done + end + end + end + return head, start, false +end + +chainmores.gpos_pair = chainprocs.gpos_pair -- okay? + +-- what pointer to return, spec says stop +-- to be discussed ... is bidi changer a space? +-- elseif char == zwnj and sequence[n][32] then -- brrr + +-- somehow l or f is global +-- we don't need to pass the currentcontext, saves a bit +-- make a slow variant then can be activated but with more tracing + +local function show_skip(kind,chainname,char,ck,class) + if ck[9] then + logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a, %a => %a",cref(kind,chainname),gref(char),class,ck[1],ck[2],ck[9],ck[10]) + else + logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a",cref(kind,chainname),gref(char),class,ck[1],ck[2]) + end +end + +local quit_on_no_replacement = true + +directives.register("otf.chain.quitonnoreplacement",function(value) -- maybe per font + quit_on_no_replacement = value +end) + +local function normal_handle_contextchain(head,start,kind,chainname,contexts,sequence,lookuphash) + -- local rule, lookuptype, sequence, f, l, lookups = ck[1], ck[2] ,ck[3], ck[4], ck[5], ck[6] + local flags = sequence.flags + local done = false + local skipmark = flags[1] + local skipligature = flags[2] + local skipbase = flags[3] + local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !) + local markclass = sequence.markclass -- todo, first we need a proper test + local skipped = false + for k=1,#contexts do + local match = true + local current = start + local last = start + local ck = contexts[k] + local seq = ck[3] + local s = #seq + -- f..l = mid string + if s == 1 then + -- never happens + match = getid(current) == glyph_code and getfont(current) == currentfont and getsubtype(current)<256 and seq[1][getchar(current)] + else + -- maybe we need a better space check (maybe check for glue or category or combination) + -- we cannot optimize for n=2 because there can be disc nodes + local f, l = ck[4], ck[5] + -- current match + if f == 1 and f == l then -- current only + -- already a hit + -- match = true + else -- before/current/after | before/current | current/after + -- no need to test first hit (to be optimized) + if f == l then -- new, else last out of sync (f is > 1) + -- match = true + else + local n = f + 1 + last = getnext(last) + while n <= l do + if last then + local id = getid(last) + if id == glyph_code then + if getfont(last) == currentfont and getsubtype(last)<256 then + local char = getchar(last) + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + last = getnext(last) + elseif seq[n][char] then + if n < l then + last = getnext(last) + end + n = n + 1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + last = getnext(last) + else + match = false + break + end + else + match = false + break + end + end + end + end + -- before + if match and f > 1 then + local prev = getprev(start) + if prev then + local n = f-1 + while n >= 1 do + if prev then + local id = getid(prev) + if id == glyph_code then + if getfont(prev) == currentfont and getsubtype(prev)<256 then -- normal char + local char = getchar(prev) + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + elseif seq[n][char] then + n = n -1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + -- skip 'm + elseif seq[n][32] then + n = n -1 + else + match = false + break + end + prev = getprev(prev) + elseif seq[n][32] then -- somewhat special, as zapfino can have many preceding spaces + n = n -1 + else + match = false + break + end + end + else + match = false + end + end + -- after + if match and s > l then + local current = last and getnext(last) + if current then + -- removed optimization for s-l == 1, we have to deal with marks anyway + local n = l + 1 + while n <= s do + if current then + local id = getid(current) + if id == glyph_code then + if getfont(current) == currentfont and getsubtype(current)<256 then -- normal char + local char = getchar(current) + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + elseif seq[n][char] then + n = n + 1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + -- skip 'm + elseif seq[n][32] then -- brrr + n = n + 1 + else + match = false + break + end + current = getnext(current) + elseif seq[n][32] then + n = n + 1 + else + match = false + break + end + end + else + match = false + end + end + end + if match then + -- ck == currentcontext + if trace_contexts then + local rule, lookuptype, f, l = ck[1], ck[2], ck[4], ck[5] + local char = getchar(start) + if ck[9] then + logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %a, %a => %a", + cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype,ck[9],ck[10]) + else + logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %a", + cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype) + end + end + local chainlookups = ck[6] + if chainlookups then + local nofchainlookups = #chainlookups + -- we can speed this up if needed + if nofchainlookups == 1 then + local chainlookupname = chainlookups[1] + local chainlookup = lookuptable[chainlookupname] + if chainlookup then + local cp = chainprocs[chainlookup.type] + if cp then + local ok + head, start, ok = cp(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence) + if ok then + done = true + end + else + logprocess("%s: %s is not yet supported",cref(kind,chainname,chainlookupname),chainlookup.type) + end + else -- shouldn't happen + logprocess("%s is not yet supported",cref(kind,chainname,chainlookupname)) + end + else + local i = 1 + while true do + if skipped then + while true do + local char = getchar(start) + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + start = getnext(start) + else + break + end + else + break + end + end + end + local chainlookupname = chainlookups[i] + local chainlookup = lookuptable[chainlookupname] + if not chainlookup then + -- okay, n matches, < n replacements + i = i + 1 + else + local cp = chainmores[chainlookup.type] + if not cp then + -- actually an error + logprocess("%s: %s is not yet supported",cref(kind,chainname,chainlookupname),chainlookup.type) + i = i + 1 + else + local ok, n + head, start, ok, n = cp(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,i,sequence) + -- messy since last can be changed ! + if ok then + done = true + -- skip next one(s) if ligature + i = i + (n or 1) + else + i = i + 1 + end + end + end + if i > nofchainlookups then + break + elseif start then + start = getnext(start) + else + -- weird + end + end + end + else + local replacements = ck[7] + if replacements then + head, start, done = chainprocs.reversesub(head,start,last,kind,chainname,ck,lookuphash,replacements) -- sequence + else + done = quit_on_no_replacement -- can be meant to be skipped / quite inconsistent in fonts + if trace_contexts then + logprocess("%s: skipping match",cref(kind,chainname)) + end + end + end + end + end + return head, start, done +end + +-- Because we want to keep this elsewhere (an because speed is less an issue) we +-- pass the font id so that the verbose variant can access the relevant helper tables. + +local verbose_handle_contextchain = function(font,...) + logwarning("no verbose handler installed, reverting to 'normal'") + otf.setcontextchain() + return normal_handle_contextchain(...) +end + +otf.chainhandlers = { + normal = normal_handle_contextchain, + verbose = verbose_handle_contextchain, +} + +function otf.setcontextchain(method) + if not method or method == "normal" or not otf.chainhandlers[method] then + if handlers.contextchain then -- no need for a message while making the format + logwarning("installing normal contextchain handler") + end + handlers.contextchain = normal_handle_contextchain + else + logwarning("installing contextchain handler %a",method) + local handler = otf.chainhandlers[method] + handlers.contextchain = function(...) + return handler(currentfont,...) -- hm, get rid of ... + end + end + handlers.gsub_context = handlers.contextchain + handlers.gsub_contextchain = handlers.contextchain + handlers.gsub_reversecontextchain = handlers.contextchain + handlers.gpos_contextchain = handlers.contextchain + handlers.gpos_context = handlers.contextchain +end + +otf.setcontextchain() + +local missing = { } -- we only report once + +local function logprocess(...) + if trace_steps then + registermessage(...) + end + report_process(...) +end + +local logwarning = report_process + +local function report_missing_cache(typ,lookup) + local f = missing[currentfont] if not f then f = { } missing[currentfont] = f end + local t = f[typ] if not t then t = { } f[typ] = t end + if not t[lookup] then + t[lookup] = true + logwarning("missing cache for lookup %a, type %a, font %a, name %a",lookup,typ,currentfont,tfmdata.properties.fullname) + end +end + +local resolved = { } -- we only resolve a font,script,language pair once + +-- todo: pass all these 'locals' in a table + +local lookuphashes = { } + +setmetatableindex(lookuphashes, function(t,font) + local lookuphash = fontdata[font].resources.lookuphash + if not lookuphash or not next(lookuphash) then + lookuphash = false + end + t[font] = lookuphash + return lookuphash +end) + +-- fonts.hashes.lookups = lookuphashes + +local autofeatures = fonts.analyzers.features -- was: constants + +local function initialize(sequence,script,language,enabled) + local features = sequence.features + if features then + local order = sequence.order + if order then + for i=1,#order do -- + local kind = order[i] -- + local valid = enabled[kind] + if valid then + local scripts = features[kind] -- + local languages = scripts[script] or scripts[wildcard] + if languages and (languages[language] or languages[wildcard]) then + return { valid, autofeatures[kind] or false, sequence.chain or 0, kind, sequence } + end + end + end + else + -- can't happen + end + end + return false +end + +function otf.dataset(tfmdata,font) -- generic variant, overloaded in context + local shared = tfmdata.shared + local properties = tfmdata.properties + local language = properties.language or "dflt" + local script = properties.script or "dflt" + local enabled = shared.features + local res = resolved[font] + if not res then + res = { } + resolved[font] = res + end + local rs = res[script] + if not rs then + rs = { } + res[script] = rs + end + local rl = rs[language] + if not rl then + rl = { + -- indexed but we can also add specific data by key + } + rs[language] = rl + local sequences = tfmdata.resources.sequences + for s=1,#sequences do + local v = enabled and initialize(sequences[s],script,language,enabled) + if v then + rl[#rl+1] = v + end + end + end + return rl +end + +-- elseif id == glue_code then +-- if p[5] then -- chain +-- local pc = pp[32] +-- if pc then +-- start, ok = start, false -- p[1](start,kind,p[2],pc,p[3],p[4]) +-- if ok then +-- done = true +-- end +-- if start then start = getnext(start) end +-- else +-- start = getnext(start) +-- end +-- else +-- start = getnext(start) +-- end + +-- there will be a new direction parser (pre-parsed etc) + +-- less bytecode: 290 -> 254 +-- +-- attr = attr or false +-- +-- local a = getattr(start,0) +-- if (a == attr and (not attribute or getprop(start,a_state) == attribute)) or (not attribute or getprop(start,a_state) == attribute) then +-- -- the action +-- end + +local function featuresprocessor(head,font,attr) + + local lookuphash = lookuphashes[font] -- we can also check sequences here + + if not lookuphash then + return head, false + end + + head = tonut(head) + + if trace_steps then + checkstep(head) + end + + tfmdata = fontdata[font] + descriptions = tfmdata.descriptions + characters = tfmdata.characters + resources = tfmdata.resources + + marks = resources.marks + anchorlookups = resources.lookup_to_anchor + lookuptable = resources.lookups + lookuptypes = resources.lookuptypes + lookuptags = resources.lookuptags + + currentfont = font + rlmode = 0 + + local sequences = resources.sequences + local done = false + local datasets = otf.dataset(tfmdata,font,attr) + + local dirstack = { } -- could move outside function + + -- We could work on sub start-stop ranges instead but I wonder if there is that + -- much speed gain (experiments showed that it made not much sense) and we need + -- to keep track of directions anyway. Also at some point I want to play with + -- font interactions and then we do need the full sweeps. + + -- Keeping track of the headnode is needed for devanagari (I generalized it a bit + -- so that multiple cases are also covered.) + + -- todo: retain prev + + for s=1,#datasets do + local dataset = datasets[s] + featurevalue = dataset[1] -- todo: pass to function instead of using a global + + local sequence = dataset[5] -- sequences[s] -- also dataset[5] + local rlparmode = 0 + local topstack = 0 + local success = false + local attribute = dataset[2] + local chain = dataset[3] -- sequence.chain or 0 + local typ = sequence.type + local subtables = sequence.subtables + if chain < 0 then + -- this is a limited case, no special treatments like 'init' etc + local handler = handlers[typ] + -- we need to get rid of this slide! probably no longer needed in latest luatex + local start = find_node_tail(head) -- slow (we can store tail because there's always a skip at the end): todo + while start do + local id = getid(start) + if id == glyph_code then + if getfont(start) == font and getsubtype(start) < 256 then + local a = getattr(start,0) + if a then + a = a == attr + else + a = true + end + if a then + for i=1,#subtables do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[getchar(start)] + if lookupmatch then + head, start, success = handler(head,start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if success then + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + if start then start = getprev(start) end + else + start = getprev(start) + end + else + start = getprev(start) + end + else + start = getprev(start) + end + end + else + local handler = handlers[typ] + local ns = #subtables + local start = head -- local ? + rlmode = 0 -- to be checked ? + if ns == 1 then -- happens often + local lookupname = subtables[1] + local lookupcache = lookuphash[lookupname] + if not lookupcache then -- also check for empty cache + report_missing_cache(typ,lookupname) + else + + local function subrun(start) + -- mostly for gsub, gpos would demand a more clever approach + local head = start + local done = false + while start do + local id = getid(start) + if id == glyph_code and getfont(start) == font and getsubtype(start) < 256 then + local a = getattr(start,0) + if a then + a = (a == attr) and (not attribute or getprop(start,a_state) == attribute) + else + a = not attribute or getprop(start,a_state) == attribute + end + if a then + local lookupmatch = lookupcache[getchar(start)] + if lookupmatch then + -- sequence kan weg + local ok + head, start, ok = handler(head,start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,1) + if ok then + done = true + end + end + if start then start = getnext(start) end + else + start = getnext(start) + end + else + start = getnext(start) + end + end + if done then + success = true + return head + end + end + + local function kerndisc(disc) -- we can assume that prev and next are glyphs + local prev = getprev(disc) + local next = getnext(disc) + if prev and next then + setfield(prev,"next",next) + -- setfield(next,"prev",prev) + local a = getattr(prev,0) + if a then + a = (a == attr) and (not attribute or getprop(prev,a_state) == attribute) + else + a = not attribute or getprop(prev,a_state) == attribute + end + if a then + local lookupmatch = lookupcache[getchar(prev)] + if lookupmatch then + -- sequence kan weg + local h, d, ok = handler(head,prev,dataset[4],lookupname,lookupmatch,sequence,lookuphash,1) + if ok then + done = true + success = true + end + end + end + setfield(prev,"next",disc) + -- setfield(next,"prev",disc) + end + return next + end + + while start do + local id = getid(start) + if id == glyph_code then + if getfont(start) == font and getsubtype(start) < 256 then + local a = getattr(start,0) + if a then + a = (a == attr) and (not attribute or getprop(start,a_state) == attribute) + else + a = not attribute or getprop(start,a_state) == attribute + end + if a then + local lookupmatch = lookupcache[getchar(start)] + if lookupmatch then + -- sequence kan weg + local ok + head, start, ok = handler(head,start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,1) + if ok then + success = true + end + end + if start then start = getnext(start) end + else + start = getnext(start) + end + else + start = getnext(start) + end + elseif id == disc_code then + -- mostly for gsub + if getsubtype(start) == discretionary_code then + local pre = getfield(start,"pre") + if pre then + local new = subrun(pre) + if new then setfield(start,"pre",new) end + end + local post = getfield(start,"post") + if post then + local new = subrun(post) + if new then setfield(start,"post",new) end + end + local replace = getfield(start,"replace") + if replace then + local new = subrun(replace) + if new then setfield(start,"replace",new) end + end +elseif typ == "gpos_single" or typ == "gpos_pair" then + kerndisc(start) + end + start = getnext(start) + elseif id == whatsit_code then -- will be function + local subtype = getsubtype(start) + if subtype == dir_code then + local dir = getfield(start,"dir") + if dir == "+TRT" or dir == "+TLT" then + topstack = topstack + 1 + dirstack[topstack] = dir + elseif dir == "-TRT" or dir == "-TLT" then + topstack = topstack - 1 + end + local newdir = dirstack[topstack] + if newdir == "+TRT" then + rlmode = -1 + elseif newdir == "+TLT" then + rlmode = 1 + else + rlmode = rlparmode + end + if trace_directions then + report_process("directions after txtdir %a: parmode %a, txtmode %a, # stack %a, new dir %a",dir,rlparmode,rlmode,topstack,newdir) + end + elseif subtype == localpar_code then + local dir = getfield(start,"dir") + if dir == "TRT" then + rlparmode = -1 + elseif dir == "TLT" then + rlparmode = 1 + else + rlparmode = 0 + end + -- one might wonder if the par dir should be looked at, so we might as well drop the next line + rlmode = rlparmode + if trace_directions then + report_process("directions after pardir %a: parmode %a, txtmode %a",dir,rlparmode,rlmode) + end + end + start = getnext(start) + elseif id == math_code then + start = getnext(end_of_math(start)) + else + start = getnext(start) + end + end + end + else + + local function subrun(start) + -- mostly for gsub, gpos would demand a more clever approach + local head = start + local done = false + while start do + local id = getid(start) + if id == glyph_code and getfont(start) == font and getsubtype(start) < 256 then + local a = getattr(start,0) + if a then + a = (a == attr) and (not attribute or getprop(start,a_state) == attribute) + else + a = not attribute or getprop(start,a_state) == attribute + end + if a then + for i=1,ns do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[getchar(start)] + if lookupmatch then + -- we could move all code inline but that makes things even more unreadable + local ok + head, start, ok = handler(head,start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if ok then + done = true + break + elseif not start then + -- don't ask why ... shouldn't happen + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + if start then start = getnext(start) end + else + start = getnext(start) + end + else + start = getnext(start) + end + end + if done then + success = true + return head + end + end + + local function kerndisc(disc) -- we can assume that prev and next are glyphs + local prev = getprev(disc) + local next = getnext(disc) + if prev and next then + setfield(prev,"next",next) + -- setfield(next,"prev",prev) + local a = getattr(prev,0) + if a then + a = (a == attr) and (not attribute or getprop(prev,a_state) == attribute) + else + a = not attribute or getprop(prev,a_state) == attribute + end + if a then + for i=1,ns do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[getchar(prev)] + if lookupmatch then + -- we could move all code inline but that makes things even more unreadable + local h, d, ok = handler(head,prev,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if ok then + done = true + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + end + setfield(prev,"next",disc) + -- setfield(next,"prev",disc) + end + return next + end + + while start do + local id = getid(start) + if id == glyph_code then + if getfont(start) == font and getsubtype(start) < 256 then + local a = getattr(start,0) + if a then + a = (a == attr) and (not attribute or getprop(start,a_state) == attribute) + else + a = not attribute or getprop(start,a_state) == attribute + end + if a then + for i=1,ns do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[getchar(start)] + if lookupmatch then + -- we could move all code inline but that makes things even more unreadable + local ok + head, start, ok = handler(head,start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if ok then + success = true + break + elseif not start then + -- don't ask why ... shouldn't happen + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + if start then start = getnext(start) end + else + start = getnext(start) + end + else + start = getnext(start) + end + elseif id == disc_code then + -- mostly for gsub + if getsubtype(start) == discretionary_code then + local pre = getfield(start,"pre") + if pre then + local new = subrun(pre) + if new then setfield(start,"pre",new) end + end + local post = getfield(start,"post") + if post then + local new = subrun(post) + if new then setfield(start,"post",new) end + end + local replace = getfield(start,"replace") + if replace then + local new = subrun(replace) + if new then setfield(start,"replace",new) end + end +elseif typ == "gpos_single" or typ == "gpos_pair" then + kerndisc(start) + end + start = getnext(start) + elseif id == whatsit_code then + local subtype = getsubtype(start) + if subtype == dir_code then + local dir = getfield(start,"dir") + if dir == "+TRT" or dir == "+TLT" then + topstack = topstack + 1 + dirstack[topstack] = dir + elseif dir == "-TRT" or dir == "-TLT" then + topstack = topstack - 1 + end + local newdir = dirstack[topstack] + if newdir == "+TRT" then + rlmode = -1 + elseif newdir == "+TLT" then + rlmode = 1 + else + rlmode = rlparmode + end + if trace_directions then + report_process("directions after txtdir %a: parmode %a, txtmode %a, # stack %a, new dir %a",dir,rlparmode,rlmode,topstack,newdir) + end + elseif subtype == localpar_code then + local dir = getfield(start,"dir") + if dir == "TRT" then + rlparmode = -1 + elseif dir == "TLT" then + rlparmode = 1 + else + rlparmode = 0 + end + rlmode = rlparmode + if trace_directions then + report_process("directions after pardir %a: parmode %a, txtmode %a",dir,rlparmode,rlmode) + end + end + start = getnext(start) + elseif id == math_code then + start = getnext(end_of_math(start)) + else + start = getnext(start) + end + end + end + end + if success then + done = true + end + if trace_steps then -- ? + registerstep(head) + end + + end + + head = tonode(head) + + return head, done +end + +local function generic(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if target then + target[unicode] = lookupdata + else + lookuphash[lookupname] = { [unicode] = lookupdata } + end +end + +local action = { + + substitution = generic, + multiple = generic, + alternate = generic, + position = generic, + + ligature = function(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if not target then + target = { } + lookuphash[lookupname] = target + end + for i=1,#lookupdata do + local li = lookupdata[i] + local tu = target[li] + if not tu then + tu = { } + target[li] = tu + end + target = tu + end + target.ligature = unicode + end, + + pair = function(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if not target then + target = { } + lookuphash[lookupname] = target + end + local others = target[unicode] + local paired = lookupdata[1] + if others then + others[paired] = lookupdata + else + others = { [paired] = lookupdata } + target[unicode] = others + end + end, + +} + +local function prepare_lookups(tfmdata) + + local rawdata = tfmdata.shared.rawdata + local resources = rawdata.resources + local lookuphash = resources.lookuphash + local anchor_to_lookup = resources.anchor_to_lookup + local lookup_to_anchor = resources.lookup_to_anchor + local lookuptypes = resources.lookuptypes + local characters = tfmdata.characters + local descriptions = tfmdata.descriptions + + -- we cannot free the entries in the descriptions as sometimes we access + -- then directly (for instance anchors) ... selectively freeing does save + -- much memory as it's only a reference to a table and the slot in the + -- description hash is not freed anyway + + for unicode, character in next, characters do -- we cannot loop over descriptions ! + + local description = descriptions[unicode] + + if description then + + local lookups = description.slookups + if lookups then + for lookupname, lookupdata in next, lookups do + action[lookuptypes[lookupname]](lookupdata,lookupname,unicode,lookuphash) + end + end + + local lookups = description.mlookups + if lookups then + for lookupname, lookuplist in next, lookups do + local lookuptype = lookuptypes[lookupname] + for l=1,#lookuplist do + local lookupdata = lookuplist[l] + action[lookuptype](lookupdata,lookupname,unicode,lookuphash) + end + end + end + + local list = description.kerns + if list then + for lookup, krn in next, list do -- ref to glyph, saves lookup + local target = lookuphash[lookup] + if target then + target[unicode] = krn + else + lookuphash[lookup] = { [unicode] = krn } + end + end + end + + local list = description.anchors + if list then + for typ, anchors in next, list do -- types + if typ == "mark" or typ == "cexit" then -- or entry? + for name, anchor in next, anchors do + local lookups = anchor_to_lookup[name] + if lookups then + for lookup, _ in next, lookups do + local target = lookuphash[lookup] + if target then + target[unicode] = anchors + else + lookuphash[lookup] = { [unicode] = anchors } + end + end + end + end + end + end + end + + end + + end + +end + +local function split(replacement,original) + local result = { } + for i=1,#replacement do + result[original[i]] = replacement[i] + end + return result +end + +local valid = { + coverage = { chainsub = true, chainpos = true, contextsub = true }, + reversecoverage = { reversesub = true }, + glyphs = { chainsub = true, chainpos = true }, +} + +local function prepare_contextchains(tfmdata) + local rawdata = tfmdata.shared.rawdata + local resources = rawdata.resources + local lookuphash = resources.lookuphash + local lookuptags = resources.lookuptags + local lookups = rawdata.lookups + if lookups then + for lookupname, lookupdata in next, rawdata.lookups do + local lookuptype = lookupdata.type + if lookuptype then + local rules = lookupdata.rules + if rules then + local format = lookupdata.format + local validformat = valid[format] + if not validformat then + report_prepare("unsupported format %a",format) + elseif not validformat[lookuptype] then + -- todo: dejavu-serif has one (but i need to see what use it has) + report_prepare("unsupported format %a, lookuptype %a, lookupname %a",format,lookuptype,lookuptags[lookupname]) + else + local contexts = lookuphash[lookupname] + if not contexts then + contexts = { } + lookuphash[lookupname] = contexts + end + local t, nt = { }, 0 + for nofrules=1,#rules do + local rule = rules[nofrules] + local current = rule.current + local before = rule.before + local after = rule.after + local replacements = rule.replacements + local sequence = { } + local nofsequences = 0 + -- Eventually we can store start, stop and sequence in the cached file + -- but then less sharing takes place so best not do that without a lot + -- of profiling so let's forget about it. + if before then + for n=1,#before do + nofsequences = nofsequences + 1 + sequence[nofsequences] = before[n] + end + end + local start = nofsequences + 1 + for n=1,#current do + nofsequences = nofsequences + 1 + sequence[nofsequences] = current[n] + end + local stop = nofsequences + if after then + for n=1,#after do + nofsequences = nofsequences + 1 + sequence[nofsequences] = after[n] + end + end + if sequence[1] then + -- Replacements only happen with reverse lookups as they are single only. We + -- could pack them into current (replacement value instead of true) and then + -- use sequence[start] instead but it's somewhat ugly. + nt = nt + 1 + t[nt] = { nofrules, lookuptype, sequence, start, stop, rule.lookups, replacements } + for unic, _ in next, sequence[start] do + local cu = contexts[unic] + if not cu then + contexts[unic] = t + end + end + end + end + end + else + -- no rules + end + else + report_prepare("missing lookuptype for lookupname %a",lookuptags[lookupname]) + end + end + end +end + +-- we can consider lookuphash == false (initialized but empty) vs lookuphash == table + +local function featuresinitializer(tfmdata,value) + if true then -- value then + -- beware we need to use the topmost properties table + local rawdata = tfmdata.shared.rawdata + local properties = rawdata.properties + if not properties.initialized then + local starttime = trace_preparing and os.clock() + local resources = rawdata.resources + resources.lookuphash = resources.lookuphash or { } + prepare_contextchains(tfmdata) + prepare_lookups(tfmdata) + properties.initialized = true + if trace_preparing then + report_prepare("preparation time is %0.3f seconds for %a",os.clock()-starttime,tfmdata.properties.fullname) + end + end + end +end + +registerotffeature { + name = "features", + description = "features", + default = true, + initializers = { + position = 1, + node = featuresinitializer, + }, + processors = { + node = featuresprocessor, + } +} + +-- This can be used for extra handlers, but should be used with care! + +otf.handlers = handlers diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index 7995be33e..c81e8cd1a 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -27,6 +27,17 @@ if not modules then modules = { } end modules ['luatex-fonts'] = { -- also add more helper code here, but that depends to what extend metatex (sidetrack of context) -- evolves into a low level layer (depends on time, as usual). +texio.write_nl("") +texio.write_nl("--------------------------------------------------------------------------------") +texio.write_nl("The font code has been brought in sync with the context version of 2014.12.21 so") +texio.write_nl("if things don't work out as expected the interfacing needs to be checked. When") +texio.write_nl("this works as expected a second upgrade will happen that gives a more complete") +texio.write_nl("support and another sync with the context code (that new code is currently being") +texio.write_nl("tested. The base pass is now integrated in the main pass. The results can differ") +texio.write_nl("from those in context because there we integrate some mechanisms differently.") +texio.write_nl("--------------------------------------------------------------------------------") +texio.write_nl("") + utf = utf or unicode.utf8 -- We have some (global) hooks (for latex): @@ -210,12 +221,12 @@ if non_generic_context.luatex_fonts.skip_loading ~= true then loadmodule('font-oti.lua') loadmodule('font-otf.lua') loadmodule('font-otb.lua') - loadmodule('node-inj.lua') -- will be replaced (luatex >= .70) - loadmodule('font-ota.lua') - loadmodule('font-otn.lua') - loadmodule('font-otp.lua') -- optional + loadmodule('luatex-fonts-inj.lua') + loadmodule('luatex-fonts-ota.lua') + loadmodule('luatex-fonts-otn.lua') + loadmodule('font-otp.lua') loadmodule('luatex-fonts-lua.lua') - loadmodule('font-def.lua') + loadmodule('font-def.lua') -- this code (stripped) might end up in luatex-fonts-def.lua loadmodule('luatex-fonts-def.lua') loadmodule('luatex-fonts-ext.lua') -- some extensions diff --git a/tex/generic/context/luatex/luatex-math.tex b/tex/generic/context/luatex/luatex-math.tex index ab304b974..604b4a1f8 100644 --- a/tex/generic/context/luatex/luatex-math.tex +++ b/tex/generic/context/luatex/luatex-math.tex @@ -19,15 +19,6 @@ % a bunch of fonts: -\font\tenrm = file:lmroman10-regular.otf:+liga;+kern;+tlig;+trep at 10pt -\font\sevenrm = file:lmroman7-regular.otf:+liga;+kern;+tlig;+trep at 7pt -\font\fiverm = file:lmroman5-regular.otf:+liga;+kern;+tlig;+trep at 5pt - -\font\tentt = file:lmmono10-regular.otf at 10pt -\font\tensl = file:lmromanslant10-regular.otf:+liga;+kern;+tlig;+trep at 10pt -\font\tenit = file:lmroman10-italic.otf:+liga;+kern;+tlig;+trep at 10pt -\font\tenbi = file:lmroman10-bolditalic.otf:+liga;+kern;+tlig;+trep at 10pt - \let \teni = \relax \let \seveni = \relax \let \fivei = \relax @@ -35,19 +26,58 @@ \let \sevensy = \relax \let \fivesy = \relax \let \tenex = \relax -\let \tenbf = \relax \let \sevenbf = \relax \let \fivebf = \relax -\tenrm +\def\latinmodern + {\font\tenrm = file:lmroman10-regular.otf:+liga;+kern;+tlig;+trep at 10pt + \font\sevenrm = file:lmroman7-regular.otf:+liga;+kern;+tlig;+trep at 7pt + \font\fiverm = file:lmroman5-regular.otf:+liga;+kern;+tlig;+trep at 5pt + % + \font\tentt = file:lmmono10-regular.otf at 10pt + \font\tensl = file:lmromanslant10-regular.otf:+liga;+kern;+tlig;+trep at 10pt + \font\tenit = file:lmroman10-italic.otf:+liga;+kern;+tlig;+trep at 10pt + \font\tenbf = file:lmroman10-bold.otf:+liga;+kern;+tlig;+trep at 10pt + \font\tenbi = file:lmroman10-bolditalic.otf:+liga;+kern;+tlig;+trep at 10pt + % + \font\mathfonttextupright = file:latinmodern-math.otf:ssty=0;fixmath=yes at 10pt + \font\mathfontscriptupright = file:latinmodern-math.otf:ssty=1;fixmath=yes at 7pt + \font\mathfontscriptscriptupright = file:latinmodern-math.otf:ssty=2;fixmath=yes at 5pt + % + \textfont 0 = \mathfonttextupright + \scriptfont 0 = \mathfontscriptupright + \scriptscriptfont 0 = \mathfontscriptscriptupright + % + \tenrm} -\font\mathfonttextupright = file:latinmodern-math.otf:ssty=0;fixmath=yes at 10pt -\font\mathfontscriptupright = file:latinmodern-math.otf:ssty=1;fixmath=yes at 7pt -\font\mathfontscriptscriptupright = file:latinmodern-math.otf:ssty=2;fixmath=yes at 5pt +\def\lucidabright + {\font\tenrm = file:lucidabrightot.otf:+liga;+kern;+tlig;+trep at 10pt + \font\sevenrm = file:lucidabrightot.otf:+liga;+kern;+tlig;+trep at 7pt + \font\fiverm = file:lucidabrightot.otf:+liga;+kern;+tlig;+trep at 5pt + % + \font\tentt = file:lucidabrightot.otf at 10pt + \font\tenit = file:lucidabrightot.otf:+liga;+kern;+tlig;+trep at 10pt + \font\tenit = file:lucidabrightot-italic.otf:+liga;+kern;+tlig;+trep at 10pt + \font\tenbf = file:lucidabrightot-demi.otf:+liga;+kern;+tlig;+trep at 10pt + \font\tenbi = file:lucidabrightot-demiitalic.otf:+liga;+kern;+tlig;+trep at 10pt + % + \font\mathfonttextupright = file:lucidabrightmathot.otf:ssty=0;fixmath=yes at 10pt + \font\mathfontscriptupright = file:lucidabrightmathot.otf:ssty=1;fixmath=yes at 7pt + \font\mathfontscriptscriptupright = file:lucidabrightmathot.otf:ssty=2;fixmath=yes at 5pt + % + \textfont 0 = \mathfonttextupright + \scriptfont 0 = \mathfontscriptupright + \scriptscriptfont 0 = \mathfontscriptscriptupright + % + \tenrm} -\textfont 0 = \mathfonttextupright -\scriptfont 0 = \mathfontscriptupright -\scriptscriptfont 0 = \mathfontscriptscriptupright +\directlua { + if arguments["mtx:lucidabright"] then + tex.print("\string\\lucidabright") + else + tex.print("\string\\latinmodern") + end +} \newtoks\everymathrm \newtoks\everymathmit @@ -58,12 +88,12 @@ \newtoks\everymathbi \newtoks\everymathtt -\def\rm{\fam0\relax\the\everymathmrm\relax\tenrm\relax} -\def\it{\fam0\relax\the\everymathit \relax\tenit\relax} -\def\sl{\fam0\relax\the\everymathsl \relax\tensl\relax} -\def\bf{\fam0\relax\the\everymathbf \relax\tenbf\relax} -\def\bi{\fam0\relax\the\everymathbi \relax\tenbi\relax} -\def\tt{\fam0\relax\the\everymathtt \relax\tentt\relax} +\def\rm{\fam0\relax\the\everymathrm\relax\tenrm\relax} +\def\it{\fam0\relax\the\everymathit\relax\tenit\relax} +\def\sl{\fam0\relax\the\everymathsl\relax\tensl\relax} +\def\bf{\fam0\relax\the\everymathbf\relax\tenbf\relax} +\def\bi{\fam0\relax\the\everymathbi\relax\tenbi\relax} +\def\tt{\fam0\relax\the\everymathtt\relax\tentt\relax} \let\mit \relax % use names or \Uchar or define a vector \let\cal \relax % idem, i'm not in the mood for this now @@ -1799,7 +1829,8 @@ % a few definitions: -\def\sqrt{\Uroot "0 "221A } +\def\sqrt {\Uroot "0 "221A{}} +\def\root#1\of{\Uroot "0 "221A{#1}} % \skewchar\teni='177 \skewchar\seveni='177 \skewchar\fivei='177 % \skewchar\tensy='60 \skewchar\sevensy='60 \skewchar\fivesy='60 diff --git a/tex/generic/context/luatex/luatex-mplib.tex b/tex/generic/context/luatex/luatex-mplib.tex index 8af9f2d8a..09dd179f3 100644 --- a/tex/generic/context/luatex/luatex-mplib.tex +++ b/tex/generic/context/luatex/luatex-mplib.tex @@ -61,6 +61,7 @@ %D Now load the needed \LUA\ code. \directlua{dofile(kpse.find_file('luatex-mplib.lua'))} +% \directlua{dofile(resolvers.findfile('luatex-mplib.lua'))} %D The following code takes care of encapsulating the literals: diff --git a/tex/generic/context/luatex/luatex-plain.tex b/tex/generic/context/luatex/luatex-plain.tex index 1ea8558e9..c9a9e36cf 100644 --- a/tex/generic/context/luatex/luatex-plain.tex +++ b/tex/generic/context/luatex/luatex-plain.tex @@ -20,6 +20,7 @@ \input {luatex-math}% \input {luatex-languages}% \input {luatex-mplib}% + % \input {luatex-gadgets}% } \edef\fmtversion{\fmtversion+luatex} diff --git a/tex/generic/context/luatex/luatex-test.tex b/tex/generic/context/luatex/luatex-test.tex index fbf8ce3cf..6f48e0ced 100644 --- a/tex/generic/context/luatex/luatex-test.tex +++ b/tex/generic/context/luatex/luatex-test.tex @@ -35,14 +35,16 @@ \font\gothic=msgothic(ms-gothic) {\gothic whatever} -\font\testy=file:IranNastaliq.ttf:mode=node;script=arab;language=dflt;+calt;+ccmp;+init;+isol;+medi;+fina;+liga;+rlig;+kern;+mark;+mkmk at 14pt -\testy این یک متن نمونه است با قلم ذر که درست آمده است. +\bgroup -\pdfprotrudechars2 \pdfadjustspacing2 + \pdfprotrudechars2 + \pdfadjustspacing2 -\font\testb=file:lmroman12-regular:+liga;extend=1.5 at 12pt \testb \input tufte \par -\font\testb=file:lmroman12-regular:+liga;slant=0.8 at 12pt \testb \input tufte \par -\font\testb=file:lmroman12-regular:+liga;protrusion=default at 12pt \testb \input tufte \par + \font\testb=file:lmroman12-regular:+liga;extend=1.5 at 12pt \testb \input tufte \par + \font\testb=file:lmroman12-regular:+liga;slant=0.8 at 12pt \testb \input tufte \par + \font\testb=file:lmroman12-regular:+liga;protrusion=default at 12pt \testb \input tufte \par + +\egroup \setmplibformat{plain} @@ -64,13 +66,12 @@ \font\test=dejavuserif:+kern at 10pt \test -\hsize 1mm -\noindent Циолковский +\bgroup \hsize 1mm \noindent Циолковский \par \egroup \loadpatterns{ru} -\noindent Циолковский +\bgroup \hsize 1mm \noindent Циолковский \par \egroup a bit of math @@ -84,4 +85,30 @@ $$\sqrt {2} { { {1} \over { {1} \over {x} } } } $$ \cows Hello World! +% math test + +\latinmodern + +\def\sqrt{\Uroot "0 "221A{}} + +\def\root#1\of{\Uroot "0 "221A{#1}} + +Inline $\sqrt{x}{1.2}$ math. % same for $\root n of x$ + +$\root3\of x$ + +$\sin{x}$ + +\lucidabright + +\def\sqrt{\Uroot "0 "221A{}} + +\def\root#1\of{\Uroot "0 "221A{#1}} + +Inline $\sqrt{x}{1.2}$ math. % same for $\root n of x$ + +$\root3\of x$ + +$\sin{x}$ + \end -- cgit v1.2.3