From c36e19abdfd15bf6cae6fa379c6ce51f3ef5332d Mon Sep 17 00:00:00 2001 From: Marius Date: Fri, 19 Oct 2012 01:21:22 +0300 Subject: beta 2012.10.19 00:06 --- tex/generic/context/luatex/luatex-basics-gen.lua | 4 +- tex/generic/context/luatex/luatex-fonts-ext.lua | 16 +- tex/generic/context/luatex/luatex-fonts-merged.lua | 7308 ++++++++++---------- tex/generic/context/luatex/luatex-fonts.lua | 2 +- tex/generic/context/luatex/luatex-mplib.tex | 11 +- 5 files changed, 3861 insertions(+), 3480 deletions(-) (limited to 'tex/generic') diff --git a/tex/generic/context/luatex/luatex-basics-gen.lua b/tex/generic/context/luatex/luatex-basics-gen.lua index bdbc3cf51..2f03efba8 100644 --- a/tex/generic/context/luatex/luatex-basics-gen.lua +++ b/tex/generic/context/luatex/luatex-basics-gen.lua @@ -87,7 +87,7 @@ local remapper = { function resolvers.findfile(name,fileformat) name = string.gsub(name,"\\","\/") fileformat = fileformat and string.lower(fileformat) - local found = kpse.find_file(name,(fileformat and fileformat ~= "" and (remapper[fileformat] or fileformat)) or file.extname(name,"tex")) + local found = kpse.find_file(name,(fileformat and fileformat ~= "" and (remapper[fileformat] or fileformat)) or file.suffix(name,"tex")) if not found or found == "" then found = kpse.find_file(name,"other text files") end @@ -96,7 +96,7 @@ end function resolvers.findbinfile(name,fileformat) if not fileformat or fileformat == "" then - fileformat = file.extname(name) -- string.match(name,"%.([^%.]-)$") + fileformat = file.suffix(name) -- string.match(name,"%.([^%.]-)$") end return resolvers.findfile(name,(fileformat and remapper[fileformat]) or fileformat) end diff --git a/tex/generic/context/luatex/luatex-fonts-ext.lua b/tex/generic/context/luatex/luatex-fonts-ext.lua index d8884ccc7..b60d04512 100644 --- a/tex/generic/context/luatex/luatex-fonts-ext.lua +++ b/tex/generic/context/luatex/luatex-fonts-ext.lua @@ -18,18 +18,14 @@ local otffeatures = fonts.constructors.newfeatures("otf") local function initializeitlc(tfmdata,value) if value then - -- the magic 40 and it formula come from Dohyun Kim - local parameters = tfmdata.parameters + -- the magic 40 and it formula come from Dohyun Kim but we might need another guess + local parameters = tfmdata.parameters local italicangle = parameters.italicangle if italicangle and italicangle ~= 0 then - local uwidth = (parameters.uwidth or 40)/2 - for unicode, d in next, tfmdata.descriptions do - local it = d.boundingbox[3] - d.width + uwidth - if it ~= 0 then - d.italic = it - end - end - tfmdata.properties.hasitalics = true + local properties = tfmdata.properties + local factor = tonumber(value) or 1 + properties.hasitalics = true + properties.autoitalicamount = factor * (parameters.uwidth or 40)/2 end end end diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index ea509c338..777defd88 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 05/30/12 11:26:34 +-- merge date : 10/19/12 00:06:05 do -- begin closure to overcome local limits and interference @@ -118,11 +118,28 @@ function string.topattern(str,lowercase,strict) end end + +function string.valid(str,default) + return (type(str) == "string" and str ~= "" and str) or default or nil +end + -- obsolete names: string.quote = string.quoted string.unquote = string.unquoted +-- handy fallback + +string.itself = function(s) return s end + +-- also handy (see utf variant) + +local pattern = Ct(C(1)^0) + +function string.totable(str) + return lpegmatch(pattern,str) +end + end -- closure do -- begin closure to overcome local limits and interference @@ -135,7 +152,8 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs, table, string = type, next, tostring, tonumber, ipairs, table, string +local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match local getmetatable, setmetatable = getmetatable, setmetatable @@ -146,6 +164,8 @@ local getinfo = debug.getinfo -- impact on ConTeXt was not that large; the remaining ipairs already -- have been replaced. In a similar fashion we also hardly used pairs. -- +-- Hm, actually ipairs was retained, but we no longer use it anyway. +-- -- Just in case, we provide the fallbacks as discussed in Programming -- in Lua (http://www.lua.org/pil/7.3.html): @@ -205,12 +225,16 @@ function table.strip(tab) end function table.keys(t) - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return keys end local function compare(a,b) @@ -223,41 +247,49 @@ local function compare(a,b) end local function sortedkeys(tab) - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - category = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if category == 0 or category == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys @@ -282,7 +314,7 @@ end table.sortedhash = sortedhash table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list) local n = #t for i=1,#list do n = n + 1 @@ -517,12 +549,26 @@ local function do_serialize(root,name,depth,level,indexed) end -- we could check for k (index) being number (cardinal) if root and next(root) then - local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- if compact then + -- -- NOT: for k=1,#root do (we need to quit at nil) + -- for k,v in ipairs(root) do -- can we use next? + -- if not first then first = k end + -- last = last + 1 + -- end + -- end + local first, last = nil, 0 if compact then - -- NOT: for k=1,#root do (we need to quit at nil) - for k,v in ipairs(root) do -- can we use next? - if not first then first = k end - last = last + 1 + last = #root + for k=1,last do +-- if not root[k] then + if root[k] == nil then + last = k - 1 + break + end + end + if last > 0 then + first = 1 end end local sk = sortedkeys(root) @@ -1014,23 +1060,27 @@ function table.reversed(t) end end -function table.sequenced(t,sep,simple) -- hash only - local s, n = { }, 0 - for k, v in sortedhash(t) do - if simple then - if v == true then - n = n + 1 - s[n] = k - elseif v and v~= "" then +function table.sequenced(t,sep) -- hash only + if t then + local s, n = { }, 0 + for k, v in sortedhash(t) do + if simple then + if v == true then + n = n + 1 + s[n] = k + elseif v and v~= "" then + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + else n = n + 1 s[n] = k .. "=" .. tostring(v) end - else - n = n + 1 - s[n] = k .. "=" .. tostring(v) end + return concat(s, sep or " | ") + else + return "" end - return concat(s, sep or " | ") end function table.print(t,...) @@ -1110,6 +1160,8 @@ local lpeg = require("lpeg") -- tracing (only used when we encounter a problem in integration of lpeg in luatex) +-- some code will move to unicode and string + local report = texio and texio.write_nl or print -- local lpmatch = lpeg.match @@ -1146,8 +1198,8 @@ local report = texio and texio.write_nl or print -- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type -local byte, char, gmatch = string.byte, string.char, string.gmatch +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -- Beware, we predefine a bunch of patterns here and one reason for doing so -- is that we get consistent behaviour in some of the visualizers. @@ -1155,9 +1207,8 @@ local byte, char, gmatch = string.byte, string.char, string.gmatch lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match -local Ct, C, Cs, Cc = lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc -local lpegtype = lpeg.type +local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp +local lpegtype, lpegmatch = lpeg.type, lpeg.match local utfcharacters = string.utfcharacters local utfgmatch = unicode and unicode.utf8.gmatch @@ -1208,6 +1259,10 @@ patterns.utf8char = utf8char patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer + patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 @@ -1227,16 +1282,16 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") -patterns.commaspacer = P(",") * patterns.spacer^0 +patterns.commaspacer = P(",") * spacer^0 patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") @@ -1251,6 +1306,10 @@ patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble patterns.unspacer = ((patterns.spacer^1)/"")^0 +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted + patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) @@ -1261,8 +1320,17 @@ patterns.beginline = #(1-newline) -- print(string.unquoted('"test"')) -- print(string.unquoted('"test"')) -function lpeg.anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } -- why so complex? +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } +end + +lpeg.anywhere = anywhere + +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false + end end function lpeg.splitter(pattern, action) @@ -1311,13 +1379,13 @@ function string.splitup(str,separator) if not separator then separator = "," end - return match(splitters_m[separator] or splitat(separator),str) + return lpegmatch(splitters_m[separator] or splitat(separator),str) end ---~ local p = splitat("->",false) print(match(p,"oeps->what->more")) -- oeps what more ---~ local p = splitat("->",true) print(match(p,"oeps->what->more")) -- oeps what->more ---~ local p = splitat("->",false) print(match(p,"oeps")) -- oeps ---~ local p = splitat("->",true) print(match(p,"oeps")) -- oeps +--~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +--~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +--~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +--~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps local cache = { } @@ -1327,16 +1395,20 @@ function lpeg.split(separator,str) c = tsplitat(separator) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.split(str,separator) - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c + end + return lpegmatch(c,str) + else + return { str } end - return match(c,str) end local spacing = patterns.spacer^0 * newline -- sort of strip @@ -1349,7 +1421,7 @@ patterns.textline = content --~ local linesplitter = Ct(content^0) --~ --~ function string.splitlines(str) ---~ return match(linesplitter,str) +--~ return lpegmatch(linesplitter,str) --~ end local linesplitter = tsplitat(newline) @@ -1357,7 +1429,7 @@ local linesplitter = tsplitat(newline) patterns.linesplitter = linesplitter function string.splitlines(str) - return match(linesplitter,str) + return lpegmatch(linesplitter,str) end local utflinesplitter = utfbom^-1 * tsplitat(newline) @@ -1365,7 +1437,58 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str or "") + return lpegmatch(utflinesplitter,str or "") +end + +local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) +local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) + +function string.utfsplit(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +-- inspect(string.utfsplit("a b c d")) +-- inspect(string.utfsplit("a b c d",true)) + +-- -- alternative 1: 0.77 +-- +-- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) +-- +-- function string.utflength(str) +-- return #lpegmatch(utfcharcounter,str or "") +-- end +-- +-- -- alternative 2: 1.70 +-- +-- local n = 0 +-- +-- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow +-- +-- function string.utflength(str) +-- n = 0 +-- lpegmatch(utfcharcounter,str or "") +-- return n +-- end +-- +-- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + +local n = 0 + +local utfcharcounter = utfbom^-1 * Cs ( ( + Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end +)^0 ) + +function string.utflength(str) + n = 0 + lpegmatch(utfcharcounter,str or "") + return n end --~ lpeg.splitters = cache -- no longer public @@ -1380,7 +1503,7 @@ function lpeg.checkedsplit(separator,str) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end function string.checkedsplit(str,separator) @@ -1391,7 +1514,7 @@ function string.checkedsplit(str,separator) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end --~ from roberto's site: @@ -1406,10 +1529,10 @@ patterns.utf8byte = utf8byte --~ local str = " a b c d " ---~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpeg.match(s,str).."]") ---~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpeg.match(s,str).."]") ---~ local s = lpeg.stripper("ab") print("["..lpeg.match(s,str).."]") ---~ local s = lpeg.keeper("ab") print("["..lpeg.match(s,str).."]") +--~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") local cache = { } @@ -1442,11 +1565,11 @@ function lpeg.keeper(str) end function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(P(1)^0) + return (P(str) + P(true)) * Cs(anything^0) end function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * P(-1))^0) + return Cs((1 - P(str) * endofstring)^0) end -- Just for fun I looked at the used bytecode and @@ -1455,8 +1578,22 @@ end function lpeg.replacer(one,two) if type(one) == "table" then local no = #one - if no > 0 then - local p + local p + if no == 0 then + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end + return Cs((p + 1)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + return Cs(((1-one)^1 + one/two)^0) + else for i=1,no do local o = one[i] local pp = P(o[1]) / o[2] @@ -1469,11 +1606,16 @@ function lpeg.replacer(one,two) return Cs((p + 1)^0) end else + one = P(one) two = two or "" - return Cs((P(one)/two + 1)^0) + return Cs(((1-one)^1 + one/two)^0) end end +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + local splitters_f, splitters_s = { }, { } function lpeg.firstofsplit(separator) -- always return value @@ -1501,14 +1643,14 @@ function lpeg.balancer(left,right) return P { left * ((1 - left - right) + V(1))^0 * right } end ---~ print(1,match(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,match(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,match(lpeg.firstofsplit(":"),"bc")) ---~ print(4,match(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,match(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,match(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,match(lpeg.secondofsplit(":"),"bc")) ---~ print(9,match(lpeg.secondofsplit(":","123"),"bc")) +--~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +--~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +--~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +--~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +--~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +--~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +--~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +--~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) --~ -- slower: --~ @@ -1522,7 +1664,7 @@ local nany = utf8char/"" function lpeg.counter(pattern) pattern = Cs((P(pattern)/" " + nany)^0) return function(str) - return #match(pattern,str) + return #lpegmatch(pattern,str) end end @@ -1536,7 +1678,7 @@ if utfgmatch then end return n else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1551,9 +1693,9 @@ else p = Cs((P(what)/" " + nany)^0) cache[p] = p end - return #match(p,str) + return #lpegmatch(p,str) else -- 4 times slower but still faster than / function - return #match(Cs((P(what)/" " + nany)^0),str) + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) end end @@ -1580,7 +1722,7 @@ local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) function string.escapedpattern(str,simple) - return match(simple and s or p,str) + return lpegmatch(simple and s or p,str) end -- utf extensies @@ -1627,7 +1769,7 @@ else p = P(uc) end end - match((utf8char/f)^0,str) + lpegmatch((utf8char/f)^0,str) return p end @@ -1643,7 +1785,7 @@ function lpeg.UR(str,more) first = str last = more or first else - first, last = match(range,str) + first, last = lpegmatch(range,str) if not last then return P(str) end @@ -1679,11 +1821,15 @@ end --~ print(lpeg.count("äáàa",lpeg.UR("àá"))) --~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order if type(list) ~= "table" then list = { list, ... } end - -- sort(list) -- longest match first + -- table.sort(list) -- longest match first local p = P(list[1]) for l=2,#list do p = p + P(list[l]) @@ -1691,10 +1837,6 @@ function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") return p end -function lpeg.is_lpeg(p) - return p and lpegtype(p) == "pattern" -end - -- For the moment here, but it might move to utilities. Beware, we need to -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we -- loop back from the end cq. prepend. @@ -1852,6 +1994,24 @@ end -- utfchar(0x205F), -- math thinspace -- } ) +-- handy from within tex: + +local lpegmatch = lpeg.match + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end + end -- closure do -- begin closure to overcome local limits and interference @@ -1874,28 +2034,49 @@ function boolean.tonumber(b) end function toboolean(str,tolerant) - if tolerant then - local tstr = type(str) - if tstr == "string" then - return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t" - elseif tstr == "number" then - return tonumber(str) ~= 0 - elseif tstr == "nil" then - return false - else - return str - end + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true elseif str == "true" then return true elseif str == "false" then return false + elseif not tolerant then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true else - return str + return str == "yes" or str == "on" or str == "t" end end string.toboolean = toboolean +function string.booleanstring(str) + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true + elseif str == "true" then + return true + elseif str == "false" then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true + else + return str == "yes" or str == "on" or str == "t" + end +end + function string.is_boolean(str,default) if type(str) == "string" then if str == "true" or str == "yes" or str == "on" or str == "t" then @@ -1986,7 +2167,7 @@ local function nameonly(name) return (gsub(match(name,"^.+[/\\](.-)$") or name,"%.[%a%d]+$","")) end -local function extname(name,default) +local function suffixonly(name,default) return match(name,"^.+%.([^/\\]-)$") or default or "" end @@ -1995,11 +2176,16 @@ local function splitname(name) return n or name, s or "" end -file.basename = basename -file.dirname = dirname -file.nameonly = nameonly -file.extname = extname -file.suffix = extname +file.basename = basename + +file.pathpart = dirname +file.dirname = dirname + +file.nameonly = nameonly + +file.suffixonly = suffixonly +file.extname = suffixonly -- obsolete +file.suffix = suffixonly function file.removesuffix(filename) return (gsub(filename,"%.[%a%d]+$","")) @@ -2145,6 +2331,11 @@ end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated +function file.size(name) + local a = attributes(name) + return a and a.size or 0 +end + -- todo: lpeg \\ / .. does not save much local checkedsplit = string.checkedsplit @@ -2275,7 +2466,7 @@ end --~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 ---~ function file.extname(name) +--~ function file.suffixonly(name) --~ return lpegmatch(pattern,name) or "" --~ end @@ -2337,7 +2528,7 @@ end --~ end --~ end ---~ local test = file.extname +--~ local test = file.suffixonly --~ local test = file.basename --~ local test = file.dirname --~ local test = file.addsuffix @@ -2383,6 +2574,7 @@ local drive = C(R("az","AZ")) * P(":") local path = C(((1-slash)^0 * slash)^0) local suffix = period * C(P(1-period)^0 * P(-1)) local base = C((1-suffix)^0) +local rest = C(P(1)^0) drive = drive + Cc("") path = path + Cc("") @@ -2391,7 +2583,8 @@ suffix = suffix + Cc("") local pattern_a = drive * path * base * suffix local pattern_b = path * base * suffix -local pattern_c = C(drive * path) * C(base * suffix) +local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures +local pattern_d = path * rest function file.splitname(str,splitdrive) if splitdrive then @@ -2401,6 +2594,10 @@ function file.splitname(str,splitdrive) end end +function file.splitbase(str) + return lpegmatch(pattern_d,str) -- returns path, base+suffix +end + function file.nametotable(str,splitdrive) -- returns table local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) if splitdrive then @@ -2422,6 +2619,8 @@ function file.nametotable(str,splitdrive) -- returns table end end +-- print(file.splitbase("a/b/c.txt")) + -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end -- -- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" } @@ -2468,14 +2667,14 @@ else io.fileseparator, io.pathseparator = "/" , ":" end -function io.loaddata(filename,textmode) +function io.loaddata(filename,textmode) -- return nil if empty local f = io.open(filename,(textmode and 'r') or 'rb') if f then local data = f:read('*all') f:close() - return data - else - return nil + if #data > 0 then + return data + end end end @@ -2497,6 +2696,45 @@ function io.savedata(filename,data,joiner) end end +function io.loadlines(filename,n) -- return nil if empty + local f = io.open(filename,'r') + if f then + if n then + local lines = { } + for i=1,n do + local line = f:read("*lines") + if line then + lines[#lines+1] = line + else + break + end + end + f:close() + lines = concat(lines,"\n") + if #lines > 0 then + return lines + end + else + local line = f:read("*line") or "" + assert(f:close()) + if #line > 0 then + return line + end + end + end +end + +function io.loadchunk(filename,n) + local f = io.open(filename,'rb') + if f then + local data = f:read(n or 1024) + f:close() + if #data > 0 then + return data + end + end +end + function io.exists(filename) local f = io.open(filename) if f == nil then @@ -2802,7 +3040,7 @@ local remapper = { function resolvers.findfile(name,fileformat) name = string.gsub(name,"\\","\/") fileformat = fileformat and string.lower(fileformat) - local found = kpse.find_file(name,(fileformat and fileformat ~= "" and (remapper[fileformat] or fileformat)) or file.extname(name,"tex")) + local found = kpse.find_file(name,(fileformat and fileformat ~= "" and (remapper[fileformat] or fileformat)) or file.suffix(name,"tex")) if not found or found == "" then found = kpse.find_file(name,"other text files") end @@ -2811,7 +3049,7 @@ end function resolvers.findbinfile(name,fileformat) if not fileformat or fileformat == "" then - fileformat = file.extname(name) -- string.match(name,"%.([^%.]-)$") + fileformat = file.suffix(name) -- string.match(name,"%.([^%.]-)$") end return resolvers.findfile(name,(fileformat and remapper[fileformat]) or fileformat) end @@ -3235,36 +3473,30 @@ if not modules then modules = { } end modules ['font-ini'] = { license = "see context related readme files" } --- basemethods -> can also be in list --- presetcontext -> defaults --- hashfeatures -> ctx version - --[[ldx--

Not much is happening here.

--ldx]]-- -local lower = string.lower -local allocate, mark = utilities.storage.allocate, utilities.storage.mark +local allocate = utilities.storage.allocate local report_defining = logs.reporter("fonts","defining") -fontloader.totable = fontloader.to_table - -fonts = fonts or { } -- already defined in context +fonts = fonts or { } local fonts = fonts --- some of these might move to where they are used first: - fonts.hashes = { identifiers = allocate() } + +fonts.tables = fonts.tables or { } +fonts.helpers = fonts.helpers or { } +fonts.tracers = fonts.tracers or { } -- for the moment till we have move to moduledata +fonts.specifiers = fonts.specifiers or { } -- in format ! + fonts.analyzers = { } -- not needed here fonts.readers = { } -fonts.tables = { } fonts.definers = { methods = { } } -fonts.specifiers = fonts.specifiers or { } -- in format ! fonts.loggers = { register = function() end } -fonts.helpers = { } -fonts.tracers = { } -- for the moment till we have move to moduledata +fontloader.totable = fontloader.to_table end -- closure @@ -3301,9 +3533,9 @@ local report_defining = logs.reporter("fonts","defining") --ldx]]-- local fonts = fonts -local constructors = { } +local constructors = fonts.constructors or { } fonts.constructors = constructors -local handlers = { } +local handlers = fonts.handlers or { } -- can have preloaded tables fonts.handlers = handlers local specifiers = fonts.specifiers @@ -3630,6 +3862,10 @@ function constructors.scale(tfmdata,specification) elseif forcedsize > 1000 then -- safeguard scaledpoints = forcedsize end + targetparameters.mathsize = mathsize -- context specific + targetparameters.textsize = textsize -- context specific + targetparameters.forcedsize = forcedsize -- context specific + targetparameters.extrafactor = extrafactor -- context specific -- local tounicode = resources.tounicode local defaultwidth = resources.defaultwidth or 0 @@ -4341,7 +4577,7 @@ setmetatableindex(formats, function(t,k) t[k] = l return l end - return rawget(t,file.extname(l)) + return rawget(t,file.suffix(l)) end) local locations = { } @@ -4438,19 +4674,31 @@ function constructors.getfeatureaction(what,where,mode,name) end end -function constructors.newfeatures(what) - local features = handlers[what].features +function constructors.newhandler(what) -- could be a metatable newindex + local handler = handlers[what] + if not handler then + handler = { } + handlers[what] = handler + end + return handler +end + +function constructors.newfeatures(what) -- could be a metatable newindex + local handler = handlers[what] + local features = handler.features if not features then - local tables = handlers[what].tables -- can be preloaded + local tables = handler.tables -- can be preloaded + local statistics = handler.statistics -- can be preloaded features = allocate { defaults = { }, descriptions = tables and tables.features or { }, + used = statistics and statistics.usedfeatures or { }, initializers = { base = { }, node = { } }, processors = { base = { }, node = { } }, manipulators = { base = { }, node = { } }, } features.register = function(specification) return register(features,specification) end - handlers[what].features = features -- will also become hidden + handler.features = features -- will also become hidden end return features end @@ -4652,17 +4900,17 @@ local format, match, lower = string.format, string.match, string.lower local tonumber = tonumber local P, S, R, C, V, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.match -local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end) +local fonts, logs, trackers = fonts, logs, trackers -local report_otf = logs.reporter("fonts","otf loading") +local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end) -local fonts = fonts +local report_otf = logs.reporter("fonts","otf loading") -local cid = { } -fonts.cid = cid +local cid = { } +fonts.cid = cid -local cidmap = { } -local cidmax = 10 +local cidmap = { } +local cidmax = 10 -- original string parser: 0.109, lpeg parser: 0.036 seconds for Adobe-CNS1-4.cidmap -- @@ -4721,8 +4969,7 @@ local function loadcidfile(filename) end end -cid.loadfile = loadcidfile -- we use the frozen variant - +cid.loadfile = loadcidfile -- we use the frozen variant local template = "%s-%s-%s.cidmap" local function locate(registry,ordering,supplement) @@ -4818,18 +5065,20 @@ if not modules then modules = { } end modules ['font-map'] = { license = "see context related readme files" } +local tonumber = tonumber + local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match local utfbyte = utf.byte -local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) +local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) local report_fonts = logs.reporter("fonts","loading") -- not otf only -local fonts = fonts -local mappings = { } -fonts.mappings = mappings +local fonts = fonts +local mappings = fonts.mappings or { } +fonts.mappings = mappings --[[ldx--

Eventually this code will disappear because map files are kind @@ -4852,7 +5101,7 @@ end local hex = R("AF","09") local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end local hexsix = (hex^1) / function(s) return tonumber(s,16) end -local dec = (R("09")^1) / tonumber +local dec = (R("09")^1) / tonumber local period = P(".") local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) @@ -5263,15 +5512,16 @@ if not modules then modules = { } end modules ['font-oti'] = { local lower = string.lower -local allocate = utilities.storage.allocate - local fonts = fonts -local otf = { } -fonts.handlers.otf = otf +local constructors = fonts.constructors -local otffeatures = fonts.constructors.newfeatures("otf") +local otf = constructors.newhandler("otf") +local otffeatures = constructors.newfeatures("otf") +local otftables = otf.tables local registerotffeature = otffeatures.register +local allocate = utilities.storage.allocate + registerotffeature { name = "features", description = "initialization of feature handler", @@ -5280,8 +5530,6 @@ registerotffeature { -- these are later hooked into node and base initializaters -local otftables = otf.tables -- not always defined - local function setmode(tfmdata,value) if value then tfmdata.properties.mode = lower(value) @@ -6466,7 +6714,7 @@ actions["prepare lookups"] = function(data,filename,raw) end -- The reverse handler does a bit redundant splitting but it's seldom --- seen so we don' tbother too much. We could store the replacement +-- seen so we don't bother too much. We could store the replacement -- in the current list (value instead of true) but it makes other code -- uglier. Maybe some day. @@ -6484,6 +6732,22 @@ local function t_uncover(splitter,cache,covers) return result end +local function s_uncover(splitter,cache,cover) + if cover == "" then + return nil + else + local uncovered = cache[cover] + if not uncovered then + uncovered = lpegmatch(splitter,cover) +-- for i=1,#uncovered do +-- uncovered[i] = { [uncovered[i]] = true } +-- end + cache[cover] = uncovered + end + return { uncovered } + end +end + local function t_hashed(t,cache) if t then local ht = { } @@ -6505,22 +6769,6 @@ local function t_hashed(t,cache) end end -local function s_uncover(splitter,cache,cover) - if cover == "" then - return nil - else - local uncovered = cache[cover] - if not uncovered then - uncovered = lpegmatch(splitter,cover) - for i=1,#uncovered do - uncovered[i] = { [uncovered[i]] = true } - end - cache[cover] = uncovered - end - return uncovered - end -end - local s_hashed = t_hashed local function r_uncover(splitter,cache,cover,replacements) @@ -6545,11 +6793,15 @@ local function r_uncover(splitter,cache,cover,replacements) end end -actions["reorganize lookups"] = function(data,filename,raw) +actions["reorganize lookups"] = function(data,filename,raw) -- we could check for "" and n == 0 -- we prefer the before lookups in a normal order if data.lookups then local splitter = data.helpers.tounicodetable - local cache, h_cache = { }, { } + local t_u_cache = { } + local s_u_cache = t_u_cache -- string keys + local t_h_cache = { } + local s_h_cache = t_h_cache -- table keys (so we could use one cache) + local r_u_cache = { } -- maybe shared for _, lookup in next, data.lookups do local rules = lookup.rules if rules then @@ -6557,15 +6809,15 @@ actions["reorganize lookups"] = function(data,filename,raw) if format == "class" then local before_class = lookup.before_class if before_class then - before_class = t_uncover(splitter,cache,reversed(before_class)) + before_class = t_uncover(splitter,t_u_cache,reversed(before_class)) end local current_class = lookup.current_class if current_class then - current_class = t_uncover(splitter,cache,current_class) + current_class = t_uncover(splitter,t_u_cache,current_class) end local after_class = lookup.after_class if after_class then - after_class = t_uncover(splitter,cache,after_class) + after_class = t_uncover(splitter,t_u_cache,after_class) end for i=1,#rules do local rule = rules[i] @@ -6575,7 +6827,7 @@ actions["reorganize lookups"] = function(data,filename,raw) for i=1,#before do before[i] = before_class[before[i]] or { } end - rule.before = t_hashed(before,h_cache) + rule.before = t_hashed(before,t_h_cache) end local current = class.current local lookups = rule.lookups @@ -6586,14 +6838,14 @@ actions["reorganize lookups"] = function(data,filename,raw) lookups[i] = false -- e.g. we can have two lookups and one replacement end end - rule.current = t_hashed(current,h_cache) + rule.current = t_hashed(current,t_h_cache) end local after = class.after if after then for i=1,#after do after[i] = after_class[after[i]] or { } end - rule.after = t_hashed(after,h_cache) + rule.after = t_hashed(after,t_h_cache) end rule.class = nil end @@ -6608,18 +6860,18 @@ actions["reorganize lookups"] = function(data,filename,raw) if coverage then local before = coverage.before if before then - before = t_uncover(splitter,cache,reversed(before)) - rule.before = t_hashed(before,h_cache) + before = t_uncover(splitter,t_u_cache,reversed(before)) + rule.before = t_hashed(before,t_h_cache) end local current = coverage.current if current then - current = t_uncover(splitter,cache,current) - rule.current = t_hashed(current,h_cache) + current = t_uncover(splitter,t_u_cache,current) + rule.current = t_hashed(current,t_h_cache) end local after = coverage.after if after then - after = t_uncover(splitter,cache,after) - rule.after = t_hashed(after,h_cache) + after = t_uncover(splitter,t_u_cache,after) + rule.after = t_hashed(after,t_h_cache) end rule.coverage = nil end @@ -6631,22 +6883,22 @@ actions["reorganize lookups"] = function(data,filename,raw) if reversecoverage then local before = reversecoverage.before if before then - before = t_uncover(splitter,cache,reversed(before)) - rule.before = t_hashed(before,h_cache) + before = t_uncover(splitter,t_u_cache,reversed(before)) + rule.before = t_hashed(before,t_h_cache) end local current = reversecoverage.current if current then - current = t_uncover(splitter,cache,current) - rule.current = t_hashed(current,h_cache) + current = t_uncover(splitter,t_u_cache,current) + rule.current = t_hashed(current,t_h_cache) end local after = reversecoverage.after if after then - after = t_uncover(splitter,cache,after) - rule.after = t_hashed(after,h_cache) + after = t_uncover(splitter,t_u_cache,after) + rule.after = t_hashed(after,t_h_cache) end local replacements = reversecoverage.replacements if replacements then - rule.replacements = r_uncover(splitter,cache,current,replacements) + rule.replacements = r_uncover(splitter,r_u_cache,current,replacements) end rule.reversecoverage = nil end @@ -6657,19 +6909,19 @@ actions["reorganize lookups"] = function(data,filename,raw) local glyphs = rule.glyphs if glyphs then local fore = glyphs.fore - if fore then - fore = s_uncover(splitter,cache,fore) - rule.before = s_hashed(fore,h_cache) + if fore and fore ~= "" then + fore = s_uncover(splitter,s_u_cache,fore) + rule.before = s_hashed(fore,s_h_cache) end local back = glyphs.back if back then - back = s_uncover(splitter,cache,back) - rule.after = s_hashed(back,h_cache) + back = s_uncover(splitter,s_u_cache,back) + rule.after = s_hashed(back,s_h_cache) end local names = glyphs.names if names then - names = s_uncover(splitter,cache,names) - rule.current = s_hashed(names,h_cache) + names = s_uncover(splitter,s_u_cache,names) + rule.current = s_hashed(names,s_h_cache) end rule.glyphs = nil end @@ -7324,7 +7576,7 @@ local function read_from_otf(specification) local allfeatures = tfmdata.shared.features or specification.features.normal constructors.applymanipulators("otf",tfmdata,allfeatures,trace_features,report_otf) constructors.setname(tfmdata,specification) -- only otf? - fonts.loggers.register(tfmdata,file.extname(specification.filename),specification) + fonts.loggers.register(tfmdata,file.suffix(specification.filename),specification) end return tfmdata end @@ -7448,26 +7700,27 @@ local type, next, tonumber, tostring = type, next, tonumber, tostring local lpegmatch = lpeg.match local utfchar = utf.char -local trace_baseinit = false trackers.register("otf.baseinit", function(v) trace_baseinit = v end) -local trace_singles = false trackers.register("otf.singles", function(v) trace_singles = v end) -local trace_multiples = false trackers.register("otf.multiples", function(v) trace_multiples = v end) -local trace_alternatives = false trackers.register("otf.alternatives", function(v) trace_alternatives = v end) -local trace_ligatures = false trackers.register("otf.ligatures", function(v) trace_ligatures = v end) -local trace_kerns = false trackers.register("otf.kerns", function(v) trace_kerns = v end) -local trace_preparing = false trackers.register("otf.preparing", function(v) trace_preparing = v end) +local trace_baseinit = false trackers.register("otf.baseinit", function(v) trace_baseinit = v end) +local trace_singles = false trackers.register("otf.singles", function(v) trace_singles = v end) +local trace_multiples = false trackers.register("otf.multiples", function(v) trace_multiples = v end) +local trace_alternatives = false trackers.register("otf.alternatives", function(v) trace_alternatives = v end) +local trace_ligatures = false trackers.register("otf.ligatures", function(v) trace_ligatures = v end) +local trace_ligatures_detail = false trackers.register("otf.ligatures.detail", function(v) trace_ligatures_detail = v end) +local trace_kerns = false trackers.register("otf.kerns", function(v) trace_kerns = v end) +local trace_preparing = false trackers.register("otf.preparing", function(v) trace_preparing = v end) -local report_prepare = logs.reporter("fonts","otf prepare") +local report_prepare = logs.reporter("fonts","otf prepare") -local fonts = fonts -local otf = fonts.handlers.otf +local fonts = fonts +local otf = fonts.handlers.otf -local otffeatures = fonts.constructors.newfeatures("otf") -local registerotffeature = otffeatures.register +local otffeatures = otf.features +local registerotffeature = otffeatures.register -otf.defaultbasealternate = "none" -- first last +otf.defaultbasealternate = "none" -- first last -local wildcard = "*" -local default = "dflt" +local wildcard = "*" +local default = "dflt" local function gref(descriptions,n) if type(n) == "number" then @@ -7602,7 +7855,7 @@ local function finalize_ligatures(tfmdata,ligatures) if ligature then local unicode, lookupdata = ligature[1], ligature[2] if trace then - print("BUILDING",concat(lookupdata," "),unicode) + trace_ligatures_detail("building %q into %q",concat(lookupdata," "),unicode) end local size = #lookupdata local firstcode = lookupdata[1] -- [2] @@ -7615,7 +7868,7 @@ local function finalize_ligatures(tfmdata,ligatures) if not firstdata then firstcode = private if trace then - print(" DEFINING",firstname,firstcode) + trace_ligatures_detail("defining %q as %q",firstname,firstcode) end unicodes[firstname] = firstcode firstdata = { intermediate = true, ligatures = { } } @@ -7639,7 +7892,7 @@ local function finalize_ligatures(tfmdata,ligatures) end end if trace then - print("CODES",firstname,firstcode,secondname,secondcode,target) + trace_ligatures_detail("codes (%s,%s) + (%s,%s) -> %s",firstname,firstcode,secondname,secondcode,target) end local firstligs = firstdata.ligatures if firstligs then @@ -8112,7 +8365,6 @@ local traverse_id = node.traverse_id local unset_attribute = node.unset_attribute local has_attribute = node.has_attribute local set_attribute = node.set_attribute -local copy_node = node.copy local insert_node_before = node.insert_before local insert_node_after = node.insert_after @@ -8124,27 +8376,16 @@ local curscurs = attributes.private('curscurs') local cursdone = attributes.private('cursdone') local kernpair = attributes.private('kernpair') local ligacomp = attributes.private('ligacomp') -local fontkern = attributes.private('fontkern') - -if context then - - local kern = nodes.pool.register(newkern()) - - set_attribute(kern,fontkern,1) -- we can have several, attributes are shared - - newkern = function(k) - local c = copy_node(kern) - c.kern = k - return c - end - -end -- This injector has been tested by Idris Samawi Hamid (several arabic fonts as well as -- the rather demanding Husayni font), Khaled Hosny (latin and arabic) and Kaj Eigner -- (arabic, hebrew and thai) and myself (whatever font I come across). I'm pretty sure -- that this code is not 100% okay but examples are needed to figure things out. +function injections.installnewkern(nk) + newkern = nk or newkern +end + local cursives = { } local marks = { } local kerns = { } @@ -8430,16 +8671,13 @@ function injections.handler(head,where,keep) -- new per 2010-10-06, width adapted per 2010-02-03 -- we used to negate the width of marks because in tfm -- that makes sense but we no longer do that so as a - -- consequence the sign of p.width was changed (we need - -- to keep an eye on it as we don't have that many fonts - -- that enter this branch .. I'm still not sure if this - -- one is right + -- consequence the sign of p.width was changed local k = wx[p] if k then - n.xoffset = p.xoffset + p.width + d[1] - k[2] + -- brill roman: A\char"0300 (but ugly anyway) + n.xoffset = p.xoffset - p.width + d[1] - k[2] -- was + p.width else - -- n.xoffset = p.xoffset + p.width + d[1] - -- lucida U\char"032F (default+mark) + -- lucida: U\char"032F (default+mark) n.xoffset = p.xoffset - p.width + d[1] -- 01-05-2011 end else @@ -8579,1328 +8817,1180 @@ end -- closure do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['font-otn'] = { +if not modules then modules = { } end modules ['font-ota'] = { version = 1.001, - comment = "companion to font-ini.mkiv", + comment = "companion to font-otf.lua (analysing)", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } --- this is still somewhat preliminary and it will get better in due time; --- much functionality could only be implemented thanks to the husayni font --- of Idris Samawi Hamid to who we dedicate this module. +-- this might become scrp-*.lua --- in retrospect it always looks easy but believe it or not, it took a lot --- of work to get proper open type support done: buggy fonts, fuzzy specs, --- special made testfonts, many skype sessions between taco, idris and me, --- torture tests etc etc ... unfortunately the code does not show how much --- time it took ... +local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat --- todo: --- --- kerning is probably not yet ok for latin around dics nodes --- extension infrastructure (for usage out of context) --- sorting features according to vendors/renderers --- alternative loop quitters --- check cursive and r2l --- find out where ignore-mark-classes went --- default features (per language, script) --- handle positions (we need example fonts) --- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) --- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +if not trackers then trackers = { register = function() end } end ---[[ldx-- -

This module is a bit more split up that I'd like but since we also want to test -with plain it has to be so. This module is part of -and discussion about improvements and functionality mostly happens on the - mailing list.

- -

The specification of OpenType is kind of vague. Apart from a lack of a proper -free specifications there's also the problem that Microsoft and Adobe -may have their own interpretation of how and in what order to apply features. -In general the Microsoft website has more detailed specifications and is a -better reference. There is also some information in the FontForge help files.

- -

Because there is so much possible, fonts might contain bugs and/or be made to -work with certain rederers. These may evolve over time which may have the side -effect that suddenly fonts behave differently.

+local trace_analyzing = false trackers.register("otf.analyzing", function(v) trace_analyzing = v end) -

After a lot of experiments (mostly by Taco, me and Idris) we're now at yet another -implementation. Of course all errors are mine and of course the code can be -improved. There are quite some optimizations going on here and processing speed -is currently acceptable. Not all functions are implemented yet, often because I -lack the fonts for testing. Many scripts are not yet supported either, but I will -look into them as soon as users ask for it.

+local fonts, nodes, node = fonts, nodes, node -

Because there are different interpretations possible, I will extend the code -with more (configureable) variants. I can also add hooks for users so that they can -write their own extensions.

+local allocate = utilities.storage.allocate -

Glyphs are indexed not by unicode but in their own way. This is because there is no -relationship with unicode at all, apart from the fact that a font might cover certain -ranges of characters. One character can have multiple shapes. However, at the - end we use unicode so and all extra glyphs are mapped into a private -space. This is needed because we need to access them and has to include -then in the output eventually.

+local otf = fonts.handlers.otf -

The raw table as it coms from gets reorganized in to fit out needs. -In that table is packed (similar tables are shared) and cached on disk -so that successive runs can use the optimized table (after loading the table is -unpacked). The flattening code used later is a prelude to an even more compact table -format (and as such it keeps evolving).

+local analyzers = fonts.analyzers +local initializers = allocate() +local methods = allocate() -

This module is sparsely documented because it is a moving target. The table format -of the reader changes and we experiment a lot with different methods for supporting -features.

+analyzers.initializers = initializers +analyzers.methods = methods +analyzers.useunicodemarks = false -

As with the code, we may decide to store more information in the - table.

+local nodecodes = nodes.nodecodes +local glyph_code = nodecodes.glyph -

Incrementing the version number will force a re-cache. We jump the number by one -when there's a fix in the library or code that -results in different tables.

---ldx]]-- +local set_attribute = node.set_attribute +local has_attribute = node.has_attribute +local traverse_id = node.traverse_id +local traverse_node_list = node.traverse --- action handler chainproc chainmore comment --- --- gsub_single ok ok ok --- gsub_multiple ok ok not implemented yet --- gsub_alternate ok ok not implemented yet --- gsub_ligature ok ok ok --- gsub_context ok -- --- gsub_contextchain ok -- --- gsub_reversecontextchain ok -- --- chainsub -- ok --- reversesub -- ok --- gpos_mark2base ok ok --- gpos_mark2ligature ok ok --- gpos_mark2mark ok ok --- gpos_cursive ok untested --- gpos_single ok ok --- gpos_pair ok ok --- gpos_context ok -- --- gpos_contextchain ok -- --- --- todo: contextpos and contextsub and class stuff --- --- actions: --- --- handler : actions triggered by lookup --- chainproc : actions triggered by contextual lookup --- chainmore : multiple substitutions triggered by contextual lookup (e.g. fij -> f + ij) --- --- remark: the 'not implemented yet' variants will be done when we have fonts that use them --- remark: we need to check what to do with discretionaries +local fontdata = fonts.hashes.identifiers +local state = attributes.private('state') +local categories = characters and characters.categories or { } -- sorry, only in context --- We used to have independent hashes for lookups but as the tags are unique --- we now use only one hash. If needed we can have multiple again but in that --- case I will probably prefix (i.e. rename) the lookups in the cached font file. +local otffeatures = fonts.constructors.newfeatures("otf") +local registerotffeature = otffeatures.register -local concat, insert, remove = table.concat, table.insert, table.remove -local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip -local type, next, tonumber, tostring = type, next, tonumber, tostring -local lpegmatch = lpeg.match -local random = math.random +--[[ldx-- +

Analyzers run per script and/or language and are needed in order to +process features right.

+--ldx]]-- -local logs, trackers, nodes, attributes = logs, trackers, nodes, attributes +analyzers.constants = { + init = 1, + medi = 2, + fina = 3, + isol = 4, + -- devanagari + rphf = 5, + half = 6, + pref = 7, + blwf = 8, + pstf = 9, +} -local registertracker = trackers.register +-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script +-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace -local fonts = fonts -local otf = fonts.handlers.otf +function analyzers.setstate(head,font) + local useunicodemarks = analyzers.useunicodemarks + local tfmdata = fontdata[font] + local characters = tfmdata.characters + local descriptions = tfmdata.descriptions + local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean + while current do + local id = current.id + if id == glyph_code and current.font == font then + local char = current.char + local d = descriptions[char] + if d then + if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then + done = true + set_attribute(current,state,5) -- mark + elseif n == 0 then + first, last, n = current, current, 1 + set_attribute(current,state,1) -- init + else + last, n = current, n+1 + set_attribute(current,state,2) -- medi + end + else -- finish + if first and first == last then + set_attribute(last,state,4) -- isol + elseif last then + set_attribute(last,state,3) -- fina + end + first, last, n = nil, nil, 0 + end + elseif id == disc_code then + -- always in the middle + set_attribute(current,state,2) -- midi + last = current + else -- finish + if first and first == last then + set_attribute(last,state,4) -- isol + elseif last then + set_attribute(last,state,3) -- fina + end + first, last, n = nil, nil, 0 + end + current = current.next + end + if first and first == last then + set_attribute(last,state,4) -- isol + elseif last then + set_attribute(last,state,3) -- fina + end + return head, done +end -local trace_lookups = false registertracker("otf.lookups", function(v) trace_lookups = v end) -local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end) -local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end) -local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end) -local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end) -local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end) -local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end) -local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end) -local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end) -local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end) -local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end) -local trace_details = false registertracker("otf.details", function(v) trace_details = v end) -local trace_applied = false registertracker("otf.applied", function(v) trace_applied = v end) -local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end) -local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end) -local trace_directions = false registertracker("otf.directions", function(v) trace_directions = v end) +-- in the future we will use language/script attributes instead of the +-- font related value, but then we also need dynamic features which is +-- somewhat slower; and .. we need a chain of them -local report_direct = logs.reporter("fonts","otf direct") -local report_subchain = logs.reporter("fonts","otf subchain") -local report_chain = logs.reporter("fonts","otf chain") -local report_process = logs.reporter("fonts","otf process") -local report_prepare = logs.reporter("fonts","otf prepare") +local function analyzeinitializer(tfmdata,value) -- attr + local script, language = otf.scriptandlanguage(tfmdata) -- attr + local action = initializers[script] + if not action then + -- skip + elseif type(action) == "function" then + return action(tfmdata,value) + else + local action = action[language] + if action then + return action(tfmdata,value) + end + end +end -registertracker("otf.verbose_chain", function(v) otf.setcontextchain(v and "verbose") end) -registertracker("otf.normal_chain", function(v) otf.setcontextchain(v and "normal") end) +local function analyzeprocessor(head,font,attr) + local tfmdata = fontdata[font] + local script, language = otf.scriptandlanguage(tfmdata,attr) + local action = methods[script] + if not action then + -- skip + elseif type(action) == "function" then + return action(head,font,attr) + else + action = action[language] + if action then + return action(head,font,attr) + end + end + return head, false +end -registertracker("otf.replacements", "otf.singles,otf.multiples,otf.alternatives,otf.ligatures") -registertracker("otf.positions","otf.marks,otf.kerns,otf.cursive") -registertracker("otf.actions","otf.replacements,otf.positions") -registertracker("otf.injections","nodes.injections") +registerotffeature { + name = "analyze", + description = "analysis of (for instance) character classes", + default = true, + initializers = { + node = analyzeinitializer, + }, + processors = { + position = 1, + node = analyzeprocessor, + } +} -registertracker("*otf.sample","otf.steps,otf.actions,otf.analyzing") +-- latin -local insert_node_after = node.insert_after -local delete_node = nodes.delete -local copy_node = node.copy -local find_node_tail = node.tail or node.slide -local set_attribute = node.set_attribute -local has_attribute = node.has_attribute -local flush_node_list = node.flush_list +methods.latn = analyzers.setstate -local setmetatableindex = table.setmetatableindex +-- this info eventually will go into char-def and we will have a state +-- table for generic then -local zwnj = 0x200C -local zwj = 0x200D -local wildcard = "*" -local default = "dflt" +local zwnj = 0x200C +local zwj = 0x200D -local nodecodes = nodes.nodecodes -local whatcodes = nodes.whatcodes -local glyphcodes = nodes.glyphcodes +local isol = { + [0x0600] = true, [0x0601] = true, [0x0602] = true, [0x0603] = true, + [0x0608] = true, [0x060B] = true, [0x0621] = true, [0x0674] = true, + [0x06DD] = true, [zwnj] = true, +} -local glyph_code = nodecodes.glyph -local glue_code = nodecodes.glue -local disc_code = nodecodes.disc -local whatsit_code = nodecodes.whatsit - -local dir_code = whatcodes.dir -local localpar_code = whatcodes.localpar - -local ligature_code = glyphcodes.ligature - -local privateattribute = attributes.private - --- Something is messed up: we have two mark / ligature indices, one at the injection --- end and one here ... this is bases in KE's patches but there is something fishy --- there as I'm pretty sure that for husayni we need some connection (as it's much --- more complex than an average font) but I need proper examples of all cases, not --- of only some. - -local state = privateattribute('state') -local markbase = privateattribute('markbase') -local markmark = privateattribute('markmark') -local markdone = privateattribute('markdone') -- assigned at the injection end -local cursbase = privateattribute('cursbase') -local curscurs = privateattribute('curscurs') -local cursdone = privateattribute('cursdone') -local kernpair = privateattribute('kernpair') -local ligacomp = privateattribute('ligacomp') -- assigned here (ideally it should be combined) - -local injections = nodes.injections -local setmark = injections.setmark -local setcursive = injections.setcursive -local setkern = injections.setkern -local setpair = injections.setpair - -local markonce = true -local cursonce = true -local kernonce = true +local isol_fina = { + [0x0622] = true, [0x0623] = true, [0x0624] = true, [0x0625] = true, + [0x0627] = true, [0x0629] = true, [0x062F] = true, [0x0630] = true, + [0x0631] = true, [0x0632] = true, [0x0648] = true, [0x0671] = true, + [0x0672] = true, [0x0673] = true, [0x0675] = true, [0x0676] = true, + [0x0677] = true, [0x0688] = true, [0x0689] = true, [0x068A] = true, + [0x068B] = true, [0x068C] = true, [0x068D] = true, [0x068E] = true, + [0x068F] = true, [0x0690] = true, [0x0691] = true, [0x0692] = true, + [0x0693] = true, [0x0694] = true, [0x0695] = true, [0x0696] = true, + [0x0697] = true, [0x0698] = true, [0x0699] = true, [0x06C0] = true, + [0x06C3] = true, [0x06C4] = true, [0x06C5] = true, [0x06C6] = true, + [0x06C7] = true, [0x06C8] = true, [0x06C9] = true, [0x06CA] = true, + [0x06CB] = true, [0x06CD] = true, [0x06CF] = true, [0x06D2] = true, + [0x06D3] = true, [0x06D5] = true, [0x06EE] = true, [0x06EF] = true, + [0x0759] = true, [0x075A] = true, [0x075B] = true, [0x076B] = true, + [0x076C] = true, [0x0771] = true, [0x0773] = true, [0x0774] = true, + [0x0778] = true, [0x0779] = true, [0xFEF5] = true, [0xFEF7] = true, + [0xFEF9] = true, [0xFEFB] = true, -local fonthashes = fonts.hashes -local fontdata = fonthashes.identifiers + -- syriac -local otffeatures = fonts.constructors.newfeatures("otf") -local registerotffeature = otffeatures.register + [0x0710] = true, [0x0715] = true, [0x0716] = true, [0x0717] = true, + [0x0718] = true, [0x0719] = true, [0x0728] = true, [0x072A] = true, + [0x072C] = true, [0x071E] = true, +} -local onetimemessage = fonts.loggers.onetimemessage +local isol_fina_medi_init = { + [0x0626] = true, [0x0628] = true, [0x062A] = true, [0x062B] = true, + [0x062C] = true, [0x062D] = true, [0x062E] = true, [0x0633] = true, + [0x0634] = true, [0x0635] = true, [0x0636] = true, [0x0637] = true, + [0x0638] = true, [0x0639] = true, [0x063A] = true, [0x063B] = true, + [0x063C] = true, [0x063D] = true, [0x063E] = true, [0x063F] = true, + [0x0640] = true, [0x0641] = true, [0x0642] = true, [0x0643] = true, + [0x0644] = true, [0x0645] = true, [0x0646] = true, [0x0647] = true, + [0x0649] = true, [0x064A] = true, [0x066E] = true, [0x066F] = true, + [0x0678] = true, [0x0679] = true, [0x067A] = true, [0x067B] = true, + [0x067C] = true, [0x067D] = true, [0x067E] = true, [0x067F] = true, + [0x0680] = true, [0x0681] = true, [0x0682] = true, [0x0683] = true, + [0x0684] = true, [0x0685] = true, [0x0686] = true, [0x0687] = true, + [0x069A] = true, [0x069B] = true, [0x069C] = true, [0x069D] = true, + [0x069E] = true, [0x069F] = true, [0x06A0] = true, [0x06A1] = true, + [0x06A2] = true, [0x06A3] = true, [0x06A4] = true, [0x06A5] = true, + [0x06A6] = true, [0x06A7] = true, [0x06A8] = true, [0x06A9] = true, + [0x06AA] = true, [0x06AB] = true, [0x06AC] = true, [0x06AD] = true, + [0x06AE] = true, [0x06AF] = true, [0x06B0] = true, [0x06B1] = true, + [0x06B2] = true, [0x06B3] = true, [0x06B4] = true, [0x06B5] = true, + [0x06B6] = true, [0x06B7] = true, [0x06B8] = true, [0x06B9] = true, + [0x06BA] = true, [0x06BB] = true, [0x06BC] = true, [0x06BD] = true, + [0x06BE] = true, [0x06BF] = true, [0x06C1] = true, [0x06C2] = true, + [0x06CC] = true, [0x06CE] = true, [0x06D0] = true, [0x06D1] = true, + [0x06FA] = true, [0x06FB] = true, [0x06FC] = true, [0x06FF] = true, + [0x0750] = true, [0x0751] = true, [0x0752] = true, [0x0753] = true, + [0x0754] = true, [0x0755] = true, [0x0756] = true, [0x0757] = true, + [0x0758] = true, [0x075C] = true, [0x075D] = true, [0x075E] = true, + [0x075F] = true, [0x0760] = true, [0x0761] = true, [0x0762] = true, + [0x0763] = true, [0x0764] = true, [0x0765] = true, [0x0766] = true, + [0x0767] = true, [0x0768] = true, [0x0769] = true, [0x076A] = true, + [0x076D] = true, [0x076E] = true, [0x076F] = true, [0x0770] = true, + [0x0772] = true, [0x0775] = true, [0x0776] = true, [0x0777] = true, + [0x077A] = true, [0x077B] = true, [0x077C] = true, [0x077D] = true, + [0x077E] = true, [0x077F] = true, -otf.defaultnodealternate = "none" -- first last + -- syriac --- we share some vars here, after all, we have no nested lookups and --- less code + [0x0712] = true, [0x0713] = true, [0x0714] = true, [0x071A] = true, + [0x071B] = true, [0x071C] = true, [0x071D] = true, [0x071F] = true, + [0x0720] = true, [0x0721] = true, [0x0722] = true, [0x0723] = true, + [0x0724] = true, [0x0725] = true, [0x0726] = true, [0x0727] = true, + [0x0729] = true, [0x072B] = true, -local tfmdata = false -local characters = false -local descriptions = false -local resources = false -local marks = false -local currentfont = false -local lookuptable = false -local anchorlookups = false -local lookuptypes = false -local handlers = { } -local rlmode = 0 -local featurevalue = false + -- also --- we cannot optimize with "start = first_glyph(head)" because then we don't --- know which rlmode we're in which messes up cursive handling later on --- --- head is always a whatsit so we can safely assume that head is not changed + [zwj] = true, +} --- we use this for special testing and documentation +local arab_warned = { } -local checkstep = (nodes and nodes.tracers and nodes.tracers.steppers.check) or function() end -local registerstep = (nodes and nodes.tracers and nodes.tracers.steppers.register) or function() end -local registermessage = (nodes and nodes.tracers and nodes.tracers.steppers.message) or function() end +-- todo: gref -local function logprocess(...) - if trace_steps then - registermessage(...) +local function warning(current,what) + local char = current.char + if not arab_warned[char] then + log.report("analyze","arab: character %s (U+%05X) has no %s class", char, char, what) + arab_warned[char] = true end - report_direct(...) -end - -local function logwarning(...) - report_direct(...) end -local function gref(n) - if type(n) == "number" then - local description = descriptions[n] - local name = description and description.name - if name then - return format("U+%05X (%s)",n,name) +local function finish(first,last) + if last then + if first == last then + local fc = first.char + if isol_fina_medi_init[fc] or isol_fina[fc] then + set_attribute(first,state,4) -- isol + else + warning(first,"isol") + set_attribute(first,state,0) -- error + end else - return format("U+%05X",n) - end - elseif not n then - return "" - else - local num, nam = { }, { } - for i=1,#n do - local ni = n[i] - if tonumber(ni) then -- later we will start at 2 - local di = descriptions[ni] - num[i] = format("U+%05X",ni) - nam[i] = di and di.name or "?" + local lc = last.char + if isol_fina_medi_init[lc] or isol_fina[lc] then -- why isol here ? + -- if laststate == 1 or laststate == 2 or laststate == 4 then + set_attribute(last,state,3) -- fina + else + warning(last,"fina") + set_attribute(last,state,0) -- error end end - return format("%s (%s)",concat(num," "), concat(nam," ")) + first, last = nil, nil + elseif first then + -- first and last are either both set so we never com here + local fc = first.char + if isol_fina_medi_init[fc] or isol_fina[fc] then + set_attribute(first,state,4) -- isol + else + warning(first,"isol") + set_attribute(first,state,0) -- error + end + first = nil end + return first, last end -local function cref(kind,chainname,chainlookupname,lookupname,index) - if index then - return format("feature %s, chain %s, sub %s, lookup %s, index %s",kind,chainname,chainlookupname,lookupname,index) - elseif lookupname then - return format("feature %s, chain %s, sub %s, lookup %s",kind,chainname or "?",chainlookupname or "?",lookupname) - elseif chainlookupname then - return format("feature %s, chain %s, sub %s",kind,chainname or "?",chainlookupname) - elseif chainname then - return format("feature %s, chain %s",kind,chainname) - else - return format("feature %s",kind) +function methods.arab(head,font,attr) -- maybe make a special version with no trace + local useunicodemarks = analyzers.useunicodemarks + local tfmdata = fontdata[font] + local marks = tfmdata.resources.marks + local first, last, current, done = nil, nil, head, false + while current do + if current.id == glyph_code and current.subtype<256 and current.font == font and not has_attribute(current,state) then + done = true + local char = current.char + if marks[char] or (useunicodemarks and categories[char] == "mn") then + set_attribute(current,state,5) -- mark + elseif isol[char] then -- can be zwj or zwnj too + first, last = finish(first,last) + set_attribute(current,state,4) -- isol + first, last = nil, nil + elseif not first then + if isol_fina_medi_init[char] then + set_attribute(current,state,1) -- init + first, last = first or current, current + elseif isol_fina[char] then + set_attribute(current,state,4) -- isol + first, last = nil, nil + else -- no arab + first, last = finish(first,last) + end + elseif isol_fina_medi_init[char] then + first, last = first or current, current + set_attribute(current,state,2) -- medi + elseif isol_fina[char] then + if not has_attribute(last,state,1) then + -- tricky, we need to check what last may be ! + set_attribute(last,state,2) -- medi + end + set_attribute(current,state,3) -- fina + first, last = nil, nil + elseif char >= 0x0600 and char <= 0x06FF then + set_attribute(current,state,6) -- rest + first, last = finish(first,last) + else --no + first, last = finish(first,last) + end + else + first, last = finish(first,last) + end + current = current.next end + first, last = finish(first,last) + return head, done end -local function pref(kind,lookupname) - return format("feature %s, lookup %s",kind,lookupname) -end +methods.syrc = methods.arab --- we can assume that languages that use marks are not hyphenated --- we can also assume that at most one discretionary is present +directives.register("otf.analyze.useunicodemarks",function(v) + analyzers.useunicodemarks = v +end) -local function markstoligature(kind,lookupname,start,stop,char) - local n = copy_node(start) - local keep = start - local current - current, start = insert_node_after(start,start,n) - local snext = stop.next - current.next = snext - if snext then - snext.prev = current - end - start.prev, stop.next = nil, nil - current.char, current.subtype, current.components = char, ligature_code, start - return keep -end +end -- closure -local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head - if start == stop then - start.char = char - return start - elseif discfound then - -- print("start->stop",nodes.tosequence(start,stop)) - local components = start.components - if components then - flush_node_list(components) - start.components = nil - end - local lignode = copy_node(start) - lignode.font = start.font - lignode.char = char - lignode.subtype = ligature_code - local next = stop.next - local prev = start.prev - stop.next = nil - start.prev = nil - lignode.components = start - -- print("lignode",nodes.tosequence(lignode)) - -- print("components",nodes.tosequence(lignode.components)) - prev.next = lignode - if next then - next.prev = lignode - end - lignode.next = next - lignode.prev = prev - -- print("start->end",nodes.tosequence(start)) - return lignode - else - -- start is the ligature - local deletemarks = markflag ~= "mark" - local n = copy_node(start) - local current - current, start = insert_node_after(start,start,n) - local snext = stop.next - current.next = snext - if snext then - snext.prev = current - end - start.prev = nil - stop.next = nil - current.char = char - current.subtype = ligature_code - current.components = start - local head = current - -- this is messy ... we should get rid of the components eventually - local i = 0 -- is index of base - while start do - if not marks[start.char] then - i = i + 1 - elseif not deletemarks then -- quite fishy - set_attribute(start,ligacomp,i) - if trace_marks then - logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(start.char),i) - end - head, current = insert_node_after(head,current,copy_node(start)) - end - start = start.next - end - start = current.next - while start and start.id == glyph_code do - if marks[start.char] then - set_attribute(start,ligacomp,i) - if trace_marks then - logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(start.char),i) - end - else - break - end - start = start.next - end - -- - -- we do need components in funny kerning mode but maybe I can better reconstruct then - -- as we do have the font components info available; removing components makes the - -- previous code much simpler - -- - -- flush_node_list(head.components) - return head - end -end +do -- begin closure to overcome local limits and interference -function handlers.gsub_single(start,kind,lookupname,replacement) - if trace_singles then - logprocess("%s: replacing %s by single %s",pref(kind,lookupname),gref(start.char),gref(replacement)) - end - start.char = replacement - return start, true -end +if not modules then modules = { } end modules ['font-otn'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} -local function get_alternative_glyph(start,alternatives,value) - -- needs checking: (global value, brrr) - local choice = nil - local n = #alternatives - local char = start.char - -- - if value == "random" then - local r = random(1,n) - value, choice = format("random, choice %s",r), alternatives[r] - elseif value == "first" then - value, choice = format("first, choice %s",1), alternatives[1] - elseif value == "last" then - value, choice = format("last, choice %s",n), alternatives[n] - else - value = tonumber(value) - if type(value) ~= "number" then - value, choice = "default, choice 1", alternatives[1] - elseif value > n then - local defaultalt = otf.defaultnodealternate - if defaultalt == "first" then - value, choice = format("no %s variants, taking %s",value,n), alternatives[n] - elseif defaultalt == "last" then - value, choice = format("no %s variants, taking %s",value,1), alternatives[1] - else - value, choice = format("no %s variants, ignoring",value), false - end - elseif value == 0 then - value, choice = format("choice %s (no change)",value), char - elseif value < 1 then - value, choice = format("no %s variants, taking %s",value,1), alternatives[1] - else - value, choice = format("choice %s",value), alternatives[value] - end - end - return choice -end +-- this is still somewhat preliminary and it will get better in due time; +-- much functionality could only be implemented thanks to the husayni font +-- of Idris Samawi Hamid to who we dedicate this module. -local function multiple_glyphs(start,multiple) -- marks ? - local nofmultiples = #multiple - if nofmultiples > 0 then - start.char = multiple[1] - if nofmultiples > 1 then - local sn = start.next - for k=2,nofmultiples do -- todo: use insert_node - local n = copy_node(start) - n.char = multiple[k] - n.next = sn - n.prev = start - if sn then - sn.prev = n - end - start.next = n - start = n - end - end - return start, true - else - if trace_multiples then - logprocess("no multiple for %s",gref(start.char)) - end - return start, false - end -end +-- in retrospect it always looks easy but believe it or not, it took a lot +-- of work to get proper open type support done: buggy fonts, fuzzy specs, +-- special made testfonts, many skype sessions between taco, idris and me, +-- torture tests etc etc ... unfortunately the code does not show how much +-- time it took ... -function handlers.gsub_alternate(start,kind,lookupname,alternative,sequence) - local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue - local choice = get_alternative_glyph(start,alternative,value) - if choice then - if trace_alternatives then - logprocess("%s: replacing %s by alternative %s (%s)",pref(kind,lookupname),gref(char),gref(choice),choice) - end - start.char = choice - else - if trace_alternatives then - logwarning("%s: no variant %s for %s",pref(kind,lookupname),tostring(value),gref(char)) - end - end - return start, true -end +-- todo: +-- +-- kerning is probably not yet ok for latin around dics nodes +-- extension infrastructure (for usage out of context) +-- sorting features according to vendors/renderers +-- alternative loop quitters +-- check cursive and r2l +-- find out where ignore-mark-classes went +-- default features (per language, script) +-- handle positions (we need example fonts) +-- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) +-- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) -function handlers.gsub_multiple(start,kind,lookupname,multiple) - if trace_multiples then - logprocess("%s: replacing %s by multiple %s",pref(kind,lookupname),gref(start.char),gref(multiple)) - end - return multiple_glyphs(start,multiple) -end +--[[ldx-- +

This module is a bit more split up that I'd like but since we also want to test +with plain it has to be so. This module is part of +and discussion about improvements and functionality mostly happens on the + mailing list.

-function handlers.gsub_ligature(start,kind,lookupname,ligature,sequence) - local s, stop, discfound = start.next, nil, false - local startchar = start.char - if marks[startchar] then - while s do - local id = s.id - if id == glyph_code and s.subtype<256 and s.font == currentfont then - local lg = ligature[s.char] - if lg then - stop = s - ligature = lg - s = s.next - else - break - end - else - break - end - end - if stop then - local lig = ligature.ligature - if lig then - if trace_ligatures then - local stopchar = stop.char - start = markstoligature(kind,lookupname,start,stop,lig) - logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) - else - start = markstoligature(kind,lookupname,start,stop,lig) - end - return start, true - else - -- ok, goto next lookup - end - end - else - local skipmark = sequence.flags[1] - while s do - local id = s.id - if id == glyph_code and s.subtype<256 then - if s.font == currentfont then - local char = s.char - if skipmark and marks[char] then - s = s.next - else - local lg = ligature[char] - if lg then - stop = s - ligature = lg - s = s.next - else - break - end - end - else - break - end - elseif id == disc_code then - discfound = true - s = s.next - else - break - end - end - if stop then - local lig = ligature.ligature - if lig then - if trace_ligatures then - local stopchar = stop.char - start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) - logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) - else - start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) - end - return start, true - else - -- ok, goto next lookup - end - end - end - return start, false -end +

The specification of OpenType is kind of vague. Apart from a lack of a proper +free specifications there's also the problem that Microsoft and Adobe +may have their own interpretation of how and in what order to apply features. +In general the Microsoft website has more detailed specifications and is a +better reference. There is also some information in the FontForge help files.

---[[ldx-- -

We get hits on a mark, but we're not sure if the it has to be applied so -we need to explicitly test for basechar, baselig and basemark entries.

---ldx]]-- +

Because there is so much possible, fonts might contain bugs and/or be made to +work with certain rederers. These may evolve over time which may have the side +effect that suddenly fonts behave differently.

-function handlers.gpos_mark2base(start,kind,lookupname,markanchors,sequence) - local markchar = start.char - if marks[markchar] then - local base = start.prev -- [glyph] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end - else - if trace_bugs then - logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) - end - return start, false - end - end - end - local baseanchors = descriptions[basechar] - if baseanchors then - baseanchors = baseanchors.anchors - end - if baseanchors then - local baseanchors = baseanchors['basechar'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", - pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) - end - return start, true - end - end - end - if trace_bugs then - logwarning("%s, no matching anchors for mark %s and base %s",pref(kind,lookupname),gref(markchar),gref(basechar)) - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) - onetimemessage(currentfont,basechar,"no base anchors",report_fonts) - end - elseif trace_bugs then - logwarning("%s: prev node is no char",pref(kind,lookupname)) - end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) - end - return start, false -end +

After a lot of experiments (mostly by Taco, me and Idris) we're now at yet another +implementation. Of course all errors are mine and of course the code can be +improved. There are quite some optimizations going on here and processing speed +is currently acceptable. Not all functions are implemented yet, often because I +lack the fonts for testing. Many scripts are not yet supported either, but I will +look into them as soon as users ask for it.

-function handlers.gpos_mark2ligature(start,kind,lookupname,markanchors,sequence) - -- check chainpos variant - local markchar = start.char - if marks[markchar] then - local base = start.prev -- [glyph] [optional marks] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end - else - if trace_bugs then - logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) - end - return start, false - end - end - end - local index = has_attribute(start,ligacomp) - local baseanchors = descriptions[basechar] - if baseanchors then - baseanchors = baseanchors.anchors - if baseanchors then - local baseanchors = baseanchors['baselig'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - ba = ba[index] - if ba then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) -- index - if trace_marks then - logprocess("%s, anchor %s, index %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", - pref(kind,lookupname),anchor,index,bound,gref(markchar),gref(basechar),index,dx,dy) - end - return start, true - end - end - end - end - if trace_bugs then - logwarning("%s: no matching anchors for mark %s and baselig %s",pref(kind,lookupname),gref(markchar),gref(basechar)) - end - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) - onetimemessage(currentfont,basechar,"no base anchors",report_fonts) - end - elseif trace_bugs then - logwarning("%s: prev node is no char",pref(kind,lookupname)) - end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) - end - return start, false -end +

Because there are different interpretations possible, I will extend the code +with more (configureable) variants. I can also add hooks for users so that they can +write their own extensions.

-function handlers.gpos_mark2mark(start,kind,lookupname,markanchors,sequence) - local markchar = start.char - if marks[markchar] then - local base = start.prev -- [glyph] [basemark] [start=mark] - -- while base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end - local slc = has_attribute(start,ligacomp) - if slc then -- a rather messy loop ... needs checking with husayni - while base do - local blc = has_attribute(base,ligacomp) - if blc and blc ~= slc then - base = base.prev - else - break - end - end - end - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go - local basechar = base.char - local baseanchors = descriptions[basechar] - if baseanchors then - baseanchors = baseanchors.anchors - if baseanchors then - baseanchors = baseanchors['basemark'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", - pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) - end - return start,true - end - end - end - if trace_bugs then - logwarning("%s: no matching anchors for mark %s and basemark %s",pref(kind,lookupname),gref(markchar),gref(basechar)) - end - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) - onetimemessage(currentfont,basechar,"no base anchors",report_fonts) - end - elseif trace_bugs then - logwarning("%s: prev node is no mark",pref(kind,lookupname)) - end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) - end - return start,false -end +

Glyphs are indexed not by unicode but in their own way. This is because there is no +relationship with unicode at all, apart from the fact that a font might cover certain +ranges of characters. One character can have multiple shapes. However, at the + end we use unicode so and all extra glyphs are mapped into a private +space. This is needed because we need to access them and has to include +then in the output eventually.

-function handlers.gpos_cursive(start,kind,lookupname,exitanchors,sequence) -- to be checked - local alreadydone = cursonce and has_attribute(start,cursbase) - if not alreadydone then - local done = false - local startchar = start.char - if marks[startchar] then - if trace_cursive then - logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) - end - else - local nxt = start.next - while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do - local nextchar = nxt.char - if marks[nextchar] then - -- should not happen (maybe warning) - nxt = nxt.next - else - local entryanchors = descriptions[nextchar] - if entryanchors then - entryanchors = entryanchors.anchors - if entryanchors then - entryanchors = entryanchors['centry'] - if entryanchors then - local al = anchorlookups[lookupname] - for anchor, entry in next, entryanchors do - if al[anchor] then - local exit = exitanchors[anchor] - if exit then - local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) - if trace_cursive then - logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) - end - done = true - break - end - end - end - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) - onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) - end - break - end - end - end - return start, done - else - if trace_cursive and trace_details then - logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) - end - return start, false - end -end +

The raw table as it coms from gets reorganized in to fit out needs. +In that table is packed (similar tables are shared) and cached on disk +so that successive runs can use the optimized table (after loading the table is +unpacked). The flattening code used later is a prelude to an even more compact table +format (and as such it keeps evolving).

-function handlers.gpos_single(start,kind,lookupname,kerns,sequence) - local startchar = start.char - local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),dx,dy,w,h) - end - return start, false -end +

This module is sparsely documented because it is a moving target. The table format +of the reader changes and we experiment a lot with different methods for supporting +features.

-function handlers.gpos_pair(start,kind,lookupname,kerns,sequence) - -- todo: kerns in disc nodes: pre, post, replace -> loop over disc too - -- todo: kerns in components of ligatures - local snext = start.next - if not snext then - return start, false - else - local prev, done = start, false - local factor = tfmdata.parameters.factor - local lookuptype = lookuptypes[lookupname] - while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do - local nextchar = snext.char - local krn = kerns[nextchar] - if not krn and marks[nextchar] then - prev = snext - snext = snext.next - else - local krn = kerns[nextchar] - if not krn then - -- skip - elseif type(krn) == "table" then - if lookuptype == "pair" then -- probably not needed - local a, b = krn[2], krn[3] - if a and #a > 0 then - local startchar = start.char - local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - if b and #b > 0 then - local startchar = start.char - local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) - if trace_kerns then - logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - else -- wrong ... position has different entries - report_process("%s: check this out (old kern stuff)",pref(kind,lookupname)) - -- local a, b = krn[2], krn[6] - -- if a and a ~= 0 then - -- local k = setkern(snext,factor,rlmode,a) - -- if trace_kerns then - -- logprocess("%s: inserting first kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) - -- end - -- end - -- if b and b ~= 0 then - -- logwarning("%s: ignoring second kern xoff %s",pref(kind,lookupname),b*factor) - -- end - end - done = true - elseif krn ~= 0 then - local k = setkern(snext,factor,rlmode,krn) - if trace_kerns then - logprocess("%s: inserting kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) - end - done = true - end - break - end - end - return start, done - end -end +

As with the code, we may decide to store more information in the + table.

---[[ldx-- -

I will implement multiple chain replacements once I run into a font that uses -it. It's not that complex to handle.

+

Incrementing the version number will force a re-cache. We jump the number by one +when there's a fix in the library or code that +results in different tables.

--ldx]]-- -local chainmores = { } -local chainprocs = { } +-- action handler chainproc chainmore comment +-- +-- gsub_single ok ok ok +-- gsub_multiple ok ok not implemented yet +-- gsub_alternate ok ok not implemented yet +-- gsub_ligature ok ok ok +-- gsub_context ok -- +-- gsub_contextchain ok -- +-- gsub_reversecontextchain ok -- +-- chainsub -- ok +-- reversesub -- ok +-- gpos_mark2base ok ok +-- gpos_mark2ligature ok ok +-- gpos_mark2mark ok ok +-- gpos_cursive ok untested +-- gpos_single ok ok +-- gpos_pair ok ok +-- gpos_context ok -- +-- gpos_contextchain ok -- +-- +-- todo: contextpos and contextsub and class stuff +-- +-- actions: +-- +-- handler : actions triggered by lookup +-- chainproc : actions triggered by contextual lookup +-- chainmore : multiple substitutions triggered by contextual lookup (e.g. fij -> f + ij) +-- +-- remark: the 'not implemented yet' variants will be done when we have fonts that use them +-- remark: we need to check what to do with discretionaries -local function logprocess(...) - if trace_steps then - registermessage(...) - end - report_subchain(...) -end +-- We used to have independent hashes for lookups but as the tags are unique +-- we now use only one hash. If needed we can have multiple again but in that +-- case I will probably prefix (i.e. rename) the lookups in the cached font file. -local logwarning = report_subchain +-- Todo: make plugin feature that operates on char/glyphnode arrays -local function logprocess(...) - if trace_steps then - registermessage(...) - end - report_chain(...) -end +local concat, insert, remove = table.concat, table.insert, table.remove +local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip +local type, next, tonumber, tostring = type, next, tonumber, tostring +local lpegmatch = lpeg.match +local random = math.random -local logwarning = report_chain +local logs, trackers, nodes, attributes = logs, trackers, nodes, attributes --- We could share functions but that would lead to extra function calls with many --- arguments, redundant tests and confusing messages. +local registertracker = trackers.register -function chainprocs.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname) - logwarning("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) - return start, false -end +local fonts = fonts +local otf = fonts.handlers.otf -function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) - logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) - return start, false -end +local trace_lookups = false registertracker("otf.lookups", function(v) trace_lookups = v end) +local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end) +local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end) +local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end) +local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end) +local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end) +local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end) +local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end) +local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end) +local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end) +local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end) +local trace_details = false registertracker("otf.details", function(v) trace_details = v end) +local trace_applied = false registertracker("otf.applied", function(v) trace_applied = v end) +local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end) +local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end) +local trace_directions = false registertracker("otf.directions", function(v) trace_directions = v end) --- The reversesub is a special case, which is why we need to store the replacements --- in a bit weird way. There is no lookup and the replacement comes from the lookup --- itself. It is meant mostly for dealing with Urdu. +local report_direct = logs.reporter("fonts","otf direct") +local report_subchain = logs.reporter("fonts","otf subchain") +local report_chain = logs.reporter("fonts","otf chain") +local report_process = logs.reporter("fonts","otf process") +local report_prepare = logs.reporter("fonts","otf prepare") +local report_warning = logs.reporter("fonts","otf warning") -function chainprocs.reversesub(start,stop,kind,chainname,currentcontext,lookuphash,replacements) - local char = start.char - local replacement = replacements[char] - if replacement then - if trace_singles then - logprocess("%s: single reverse replacement of %s by %s",cref(kind,chainname),gref(char),gref(replacement)) - end - start.char = replacement - return start, true - else - return start, false - end -end +registertracker("otf.verbose_chain", function(v) otf.setcontextchain(v and "verbose") end) +registertracker("otf.normal_chain", function(v) otf.setcontextchain(v and "normal") end) ---[[ldx-- -

This chain stuff is somewhat tricky since we can have a sequence of actions to be -applied: single, alternate, multiple or ligature where ligature can be an invalid -one in the sense that it will replace multiple by one but not neccessary one that -looks like the combination (i.e. it is the counterpart of multiple then). For -example, the following is valid:

+registertracker("otf.replacements", "otf.singles,otf.multiples,otf.alternatives,otf.ligatures") +registertracker("otf.positions","otf.marks,otf.kerns,otf.cursive") +registertracker("otf.actions","otf.replacements,otf.positions") +registertracker("otf.injections","nodes.injections") - -xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx - +registertracker("*otf.sample","otf.steps,otf.actions,otf.analyzing") -

Therefore we we don't really do the replacement here already unless we have the -single lookup case. The efficiency of the replacements can be improved by deleting -as less as needed but that would also make the code even more messy.

---ldx]]-- +local insert_node_after = node.insert_after +local delete_node = nodes.delete +local copy_node = node.copy +local find_node_tail = node.tail or node.slide +local set_attribute = node.set_attribute +local has_attribute = node.has_attribute +local flush_node_list = node.flush_list -local function delete_till_stop(start,stop,ignoremarks) -- keeps start - local n = 1 - if start == stop then - -- done - elseif ignoremarks then - repeat -- start x x m x x stop => start m - local next = start.next - if not marks[next.char] then - delete_node(start,next) - end - n = n + 1 - until next == stop - else -- start x x x stop => start - repeat - local next = start.next - delete_node(start,next) - n = n + 1 - until next == stop - end - return n -end +local setmetatableindex = table.setmetatableindex ---[[ldx-- -

Here we replace start by a single variant, First we delete the rest of the -match.

---ldx]]-- +local zwnj = 0x200C +local zwj = 0x200D +local wildcard = "*" +local default = "dflt" -function chainprocs.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) - -- todo: marks ? - local current = start - local subtables = currentlookup.subtables - if #subtables > 1 then - logwarning("todo: check if we need to loop over the replacements: %s",concat(subtables," ")) - end - while current do - if current.id == glyph_code then - local currentchar = current.char - local lookupname = subtables[1] -- only 1 - local replacement = lookuphash[lookupname] - if not replacement then - if trace_bugs then - logwarning("%s: no single hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) - end - else - replacement = replacement[currentchar] - if not replacement then - if trace_bugs then - logwarning("%s: no single for %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar)) - end - else - if trace_singles then - logprocess("%s: replacing single %s by %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar),gref(replacement)) - end - current.char = replacement - end - end - return start, true - elseif current == stop then - break - else - current = current.next - end +local nodecodes = nodes.nodecodes +local whatcodes = nodes.whatcodes +local glyphcodes = nodes.glyphcodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local disc_code = nodecodes.disc +local whatsit_code = nodecodes.whatsit + +local dir_code = whatcodes.dir +local localpar_code = whatcodes.localpar + +local ligature_code = glyphcodes.ligature + +local privateattribute = attributes.private + +-- Something is messed up: we have two mark / ligature indices, one at the injection +-- end and one here ... this is bases in KE's patches but there is something fishy +-- there as I'm pretty sure that for husayni we need some connection (as it's much +-- more complex than an average font) but I need proper examples of all cases, not +-- of only some. + +local state = privateattribute('state') +local markbase = privateattribute('markbase') +local markmark = privateattribute('markmark') +local markdone = privateattribute('markdone') -- assigned at the injection end +local cursbase = privateattribute('cursbase') +local curscurs = privateattribute('curscurs') +local cursdone = privateattribute('cursdone') +local kernpair = privateattribute('kernpair') +local ligacomp = privateattribute('ligacomp') -- assigned here (ideally it should be combined) + +local injections = nodes.injections +local setmark = injections.setmark +local setcursive = injections.setcursive +local setkern = injections.setkern +local setpair = injections.setpair + +local markonce = true +local cursonce = true +local kernonce = true + +local fonthashes = fonts.hashes +local fontdata = fonthashes.identifiers + +local otffeatures = fonts.constructors.newfeatures("otf") +local registerotffeature = otffeatures.register + +local onetimemessage = fonts.loggers.onetimemessage + +otf.defaultnodealternate = "none" -- first last + +-- we share some vars here, after all, we have no nested lookups and +-- less code + +local tfmdata = false +local characters = false +local descriptions = false +local resources = false +local marks = false +local currentfont = false +local lookuptable = false +local anchorlookups = false +local lookuptypes = false +local handlers = { } +local rlmode = 0 +local featurevalue = false + +-- we cannot optimize with "start = first_glyph(head)" because then we don't +-- know which rlmode we're in which messes up cursive handling later on +-- +-- head is always a whatsit so we can safely assume that head is not changed + +-- we use this for special testing and documentation + +local checkstep = (nodes and nodes.tracers and nodes.tracers.steppers.check) or function() end +local registerstep = (nodes and nodes.tracers and nodes.tracers.steppers.register) or function() end +local registermessage = (nodes and nodes.tracers and nodes.tracers.steppers.message) or function() end + +local function logprocess(...) + if trace_steps then + registermessage(...) end - return start, false + report_direct(...) end -chainmores.gsub_single = chainprocs.gsub_single - ---[[ldx-- -

Here we replace start by a sequence of new glyphs. First we delete the rest of -the match.

---ldx]]-- +local function logwarning(...) + report_direct(...) +end -function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - delete_till_stop(start,stop) -- we could pass ignoremarks as #3 .. - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local replacements = lookuphash[lookupname] - if not replacements then - if trace_bugs then - logwarning("%s: no multiple hits",cref(kind,chainname,chainlookupname,lookupname)) +local function gref(n) + if type(n) == "number" then + local description = descriptions[n] + local name = description and description.name + if name then + return format("U+%05X (%s)",n,name) + else + return format("U+%05X",n) end + elseif not n then + return "" else - replacements = replacements[startchar] - if not replacements then - if trace_bugs then - logwarning("%s: no multiple for %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar)) - end - else - if trace_multiples then - logprocess("%s: replacing %s by multiple characters %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar),gref(replacements)) + local num, nam = { }, { } + for i=1,#n do + local ni = n[i] + if tonumber(ni) then -- later we will start at 2 + local di = descriptions[ni] + num[i] = format("U+%05X",ni) + nam[i] = di and di.name or "?" end - return multiple_glyphs(start,replacements) end + return format("%s (%s)",concat(num," "), concat(nam," ")) end - return start, false end --- function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) --- return start, false --- end - -chainmores.gsub_multiple = chainprocs.gsub_multiple +local function cref(kind,chainname,chainlookupname,lookupname,index) + if index then + return format("feature %s, chain %s, sub %s, lookup %s, index %s",kind,chainname,chainlookupname,lookupname,index) + elseif lookupname then + return format("feature %s, chain %s, sub %s, lookup %s",kind,chainname or "?",chainlookupname or "?",lookupname) + elseif chainlookupname then + return format("feature %s, chain %s, sub %s",kind,chainname or "?",chainlookupname) + elseif chainname then + return format("feature %s, chain %s",kind,chainname) + else + return format("feature %s",kind) + end +end ---[[ldx-- -

Here we replace start by new glyph. First we delete the rest of the match.

---ldx]]-- +local function pref(kind,lookupname) + return format("feature %s, lookup %s",kind,lookupname) +end --- char_1 mark_1 -> char_x mark_1 (ignore marks) --- char_1 mark_1 -> char_x +-- We can assume that languages that use marks are not hyphenated. We can also assume +-- that at most one discretionary is present. --- to be checked: do we always have just one glyph? --- we can also have alternates for marks --- marks come last anyway --- are there cases where we need to delete the mark +-- We do need components in funny kerning mode but maybe I can better reconstruct then +-- as we do have the font components info available; removing components makes the +-- previous code much simpler. Also, later on copying and freeing becomes easier. +-- However, for arabic we need to keep them around for the sake of mark placement +-- and indices. -function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - local current = start - local subtables = currentlookup.subtables - local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue - while current do - if current.id == glyph_code then -- is this check needed? - local currentchar = current.char - local lookupname = subtables[1] - local alternatives = lookuphash[lookupname] - if not alternatives then - if trace_bugs then - logwarning("%s: no alternative hit",cref(kind,chainname,chainlookupname,lookupname)) - end - else - alternatives = alternatives[currentchar] - if alternatives then - local choice = get_alternative_glyph(current,alternatives,value) - if choice then - if trace_alternatives then - logprocess("%s: replacing %s by alternative %s (%s)",cref(kind,chainname,chainlookupname,lookupname),gref(char),gref(choice),choice) - end - start.char = choice - else - if trace_alternatives then - logwarning("%s: no variant %s for %s",cref(kind,chainname,chainlookupname,lookupname),tostring(value),gref(char)) - end - end - elseif trace_bugs then - logwarning("%s: no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),gref(currentchar)) - end - end - return start, true - elseif current == stop then - break - else - current = current.next - end +local function copy_glyph(g) -- next and prev are untouched ! + local components = g.components + if components then + g.components = nil + local n = copy_node(g) + g.components = components + return n + else + return copy_node(g) end - return start, false end --- function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) --- return start, false +-- start is a mark and we need to keep that one + +-- local function markstoligature(kind,lookupname,start,stop,char) +-- -- [start]..[stop] +-- local keep = start +-- local prev = start.prev +-- local next = stop.next +-- local base = copy_glyph(start) +-- local current, start = insert_node_after(start,start,base) +-- -- [current][start]..[stop] +-- current.next = next +-- if next then +-- next.prev = current +-- end +-- start.prev = nil +-- stop.next = nil +-- current.char = char +-- current.subtype = ligature_code +-- current.components = start +-- return keep -- end -chainmores.gsub_alternate = chainprocs.gsub_alternate +local function markstoligature(kind,lookupname,start,stop,char) + if start == stop and start.char == char then + return start + else + local prev = start.prev + local next = stop.next + start.prev = nil + stop.next = nil + local base = copy_glyph(start) + base.char = char + base.subtype = ligature_code + base.components = start + if prev then + prev.next = base + end + if next then + next.prev = base + end + base.next = next + base.prev = prev + return base + end +end ---[[ldx-- -

When we replace ligatures we use a helper that handles the marks. I might change -this function (move code inline and handle the marks by a separate function). We -assume rather stupid ligatures (no complex disc nodes).

---ldx]]-- +-- The next code is somewhat complicated by the fact that some fonts can have ligatures made +-- from ligatures that themselves have marks. This was identified by Kai in for instance +-- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes +-- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next +-- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the +-- third component. -function chainprocs.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local ligatures = lookuphash[lookupname] - if not ligatures then - if trace_bugs then - logwarning("%s: no ligature hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) - end +local function getcomponentindex(start) + if start.id ~= glyph_code then + return 0 + elseif start.subtype == ligature_code then + local i = 0 + local components = start.components + while components do + i = i + getcomponentindex(components) + components = components.next + end + return i + elseif not marks[start.char] then + return 1 else - ligatures = ligatures[startchar] - if not ligatures then - if trace_bugs then - logwarning("%s: no ligatures starting with %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) - end - else - local s = start.next - local discfound = false - local last = stop - local nofreplacements = 0 - local skipmark = currentlookup.flags[1] - while s do - local id = s.id - if id == disc_code then + return 0 + end +end + +-- local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head +-- if start == stop and start.char == char then +-- start.char = char +-- return start +-- elseif discfound then +-- local prev = start.prev +-- local next = stop.next +-- start.prev = nil +-- stop.next = nil +-- local base = copy_glyph(start) +-- base.char = char +-- base.subtype = ligature_code +-- base.components = start -- start can have components +-- if prev then +-- prev.next = base +-- end +-- if next then +-- next.prev = base +-- end +-- base.next = next +-- base.prev = prev +-- return base +-- else +-- -- start is the ligature +-- local deletemarks = markflag ~= "mark" +-- local prev = start.prev +-- local next = stop.next +-- local base = copy_glyph(start) +-- local current, start = insert_node_after(start,start,base) +-- -- [start->current][copyofstart->start]...[stop] +-- current.next = next +-- if next then +-- next.prev = current +-- end +-- start.prev = nil +-- stop.next = nil +-- current.char = char +-- current.subtype = ligature_code +-- current.components = start +-- local head = current +-- -- this is messy ... we should get rid of the components eventually +-- local baseindex = 0 +-- local componentindex = 0 +-- while start do +-- local char = start.char +-- if not marks[char] then +-- baseindex = baseindex + componentindex +-- componentindex = getcomponentindex(start) +-- elseif not deletemarks then -- quite fishy +-- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) +-- if trace_marks then +-- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) +-- end +-- head, current = insert_node_after(head,current,copy_glyph(start)) -- unlikely that mark has components +-- end +-- start = start.next +-- end +-- start = current.next +-- while start and start.id == glyph_code do -- hm, is id test needed ? +-- local char = start.char +-- if marks[char] then +-- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) +-- if trace_marks then +-- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) +-- end +-- else +-- break +-- end +-- start = start.next +-- end +-- return head +-- end +-- end + +local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head + if start == stop and start.char == char then + start.char = char + return start + end + local prev = start.prev + local next = stop.next + start.prev = nil + stop.next = nil + local base = copy_glyph(start) + base.char = char + base.subtype = ligature_code + base.components = start -- start can have components + if prev then + prev.next = base + end + if next then + next.prev = base + end + base.next = next + base.prev = prev + if not discfound then + local deletemarks = markflag ~= "mark" + local components = start + local baseindex = 0 + local componentindex = 0 + local head = base + local current = base + while start do + local char = start.char + if not marks[char] then + baseindex = baseindex + componentindex + componentindex = getcomponentindex(start) + elseif not deletemarks then -- quite fishy + set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) + if trace_marks then + logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) + end + head, current = insert_node_after(head,current,copy_node(start)) -- unlikely that mark has components + end + start = start.next + end + local start = components + while start and start.id == glyph_code do -- hm, is id test needed ? + local char = start.char + if marks[char] then + set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) + if trace_marks then + logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) + end + else + break + end + start = start.next + end + end + return base +end + +function handlers.gsub_single(start,kind,lookupname,replacement) + if trace_singles then + logprocess("%s: replacing %s by single %s",pref(kind,lookupname),gref(start.char),gref(replacement)) + end + start.char = replacement + return start, true +end + +local function get_alternative_glyph(start,alternatives,value) + -- needs checking: (global value, brrr) + local choice = nil + local n = #alternatives + local char = start.char + -- + if value == "random" then + local r = random(1,n) + value, choice = format("random, choice %s",r), alternatives[r] + elseif value == "first" then + value, choice = format("first, choice %s",1), alternatives[1] + elseif value == "last" then + value, choice = format("last, choice %s",n), alternatives[n] + else + value = tonumber(value) + if type(value) ~= "number" then + value, choice = "default, choice 1", alternatives[1] + elseif value > n then + local defaultalt = otf.defaultnodealternate + if defaultalt == "first" then + value, choice = format("no %s variants, taking %s",value,n), alternatives[n] + elseif defaultalt == "last" then + value, choice = format("no %s variants, taking %s",value,1), alternatives[1] + else + value, choice = format("no %s variants, ignoring",value), false + end + elseif value == 0 then + value, choice = format("choice %s (no change)",value), char + elseif value < 1 then + value, choice = format("no %s variants, taking %s",value,1), alternatives[1] + else + value, choice = format("choice %s",value), alternatives[value] + end + end + return choice +end + +local function multiple_glyphs(start,multiple) -- marks ? + local nofmultiples = #multiple + if nofmultiples > 0 then + start.char = multiple[1] + if nofmultiples > 1 then + local sn = start.next + for k=2,nofmultiples do -- todo: use insert_node + local n = copy_node(start) -- ignore components + n.char = multiple[k] + n.next = sn + n.prev = start + if sn then + sn.prev = n + end + start.next = n + start = n + end + end + return start, true + else + if trace_multiples then + logprocess("no multiple for %s",gref(start.char)) + end + return start, false + end +end + +function handlers.gsub_alternate(start,kind,lookupname,alternative,sequence) + local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue + local choice = get_alternative_glyph(start,alternative,value) + if choice then + if trace_alternatives then + logprocess("%s: replacing %s by alternative %s (%s)",pref(kind,lookupname),gref(start.char),gref(choice),choice) + end + start.char = choice + else + if trace_alternatives then + logwarning("%s: no variant %s for %s",pref(kind,lookupname),tostring(value),gref(start.char)) + end + end + return start, true +end + +function handlers.gsub_multiple(start,kind,lookupname,multiple) + if trace_multiples then + logprocess("%s: replacing %s by multiple %s",pref(kind,lookupname),gref(start.char),gref(multiple)) + end + return multiple_glyphs(start,multiple) +end + +function handlers.gsub_ligature(start,kind,lookupname,ligature,sequence) + local s, stop, discfound = start.next, nil, false + local startchar = start.char + if marks[startchar] then + while s do + local id = s.id + if id == glyph_code and s.subtype<256 and s.font == currentfont then + local lg = ligature[s.char] + if lg then + stop = s + ligature = lg s = s.next - discfound = true else - local schar = s.char - if skipmark and marks[schar] then -- marks + break + end + else + break + end + end + if stop then + local lig = ligature.ligature + if lig then + if trace_ligatures then + local stopchar = stop.char + start = markstoligature(kind,lookupname,start,stop,lig) + logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) + else + start = markstoligature(kind,lookupname,start,stop,lig) + end + return start, true + else + -- ok, goto next lookup + end + end + else + local skipmark = sequence.flags[1] + while s do + local id = s.id + if id == glyph_code and s.subtype<256 then + if s.font == currentfont then + local char = s.char + if skipmark and marks[char] then s = s.next else - local lg = ligatures[schar] + local lg = ligature[char] if lg then - ligatures, last, nofreplacements = lg, s, nofreplacements + 1 - if s == stop then - break - else - s = s.next - end + stop = s + ligature = lg + s = s.next else break end end + else + break end + elseif id == disc_code then + discfound = true + s = s.next + else + break end - local l2 = ligatures.ligature - if l2 then - if chainindex then - stop = last - end + end + if stop then + local lig = ligature.ligature + if lig then if trace_ligatures then - if start == stop then - logprocess("%s: replacing character %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(l2)) - else - logprocess("%s: replacing character %s upto %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char),gref(l2)) - end - end - start = toligature(kind,lookupname,start,stop,l2,currentlookup.flags[1],discfound) - return start, true, nofreplacements - elseif trace_bugs then - if start == stop then - logwarning("%s: replacing character %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) + local stopchar = stop.char + start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) + logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) else - logwarning("%s: replacing character %s upto %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char)) + start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) end + return start, true + else + -- ok, goto next lookup end end end - return start, false, 0 + return start, false end -chainmores.gsub_ligature = chainprocs.gsub_ligature +--[[ldx-- +

We get hits on a mark, but we're not sure if the it has to be applied so +we need to explicitly test for basechar, baselig and basemark entries.

+--ldx]]-- -function chainprocs.gpos_mark2base(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) +function handlers.gpos_mark2base(start,kind,lookupname,markanchors,sequence) local markchar = start.char if marks[markchar] then - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local markanchors = lookuphash[lookupname] - if markanchors then - markanchors = markanchors[markchar] - end - if markanchors then - local base = start.prev -- [glyph] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end - else - if trace_bugs then - logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) - end - return start, false - end - end + local base = start.prev -- [glyph] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return start, false + end end - local baseanchors = descriptions[basechar].anchors + end + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + end + if baseanchors then + local baseanchors = baseanchors['basechar'] if baseanchors then - local baseanchors = baseanchors['basechar'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", - cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) - end - return start, true + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", + pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) end + return start, true end end - if trace_bugs then - logwarning("%s, no matching anchors for mark %s and base %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) - end + end + if trace_bugs then + logwarning("%s, no matching anchors for mark %s and base %s",pref(kind,lookupname),gref(markchar),gref(basechar)) end end - elseif trace_bugs then - logwarning("%s: prev node is no char",cref(kind,chainname,chainlookupname,lookupname)) + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) end elseif trace_bugs then - logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + logwarning("%s: prev node is no char",pref(kind,lookupname)) end elseif trace_bugs then - logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) end return start, false end -function chainprocs.gpos_mark2ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) +function handlers.gpos_mark2ligature(start,kind,lookupname,markanchors,sequence) + -- check chainpos variant local markchar = start.char if marks[markchar] then - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local markanchors = lookuphash[lookupname] - if markanchors then - markanchors = markanchors[markchar] - end - if markanchors then - local base = start.prev -- [glyph] [optional marks] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end - else - if trace_bugs then - logwarning("%s: no base for mark %s",cref(kind,chainname,chainlookupname,lookupname),markchar) - end - return start, false + local base = start.prev -- [glyph] [optional marks] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) end + return start, false end end - -- todo: like marks a ligatures hash - local index = has_attribute(start,ligacomp) - local baseanchors = descriptions[basechar].anchors + end + local index = has_attribute(start,ligacomp) + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors if baseanchors then local baseanchors = baseanchors['baselig'] if baseanchors then @@ -9913,8 +10003,8 @@ function chainprocs.gpos_mark2ligature(start,stop,kind,chainname,currentcontext, if ba then local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) -- index if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", - cref(kind,chainname,chainlookupname,lookupname),anchor,a or bound,gref(markchar),gref(basechar),index,dx,dy) + logprocess("%s, anchor %s, index %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", + pref(kind,lookupname),anchor,index,bound,gref(markchar),gref(basechar),index,dx,dy) end return start, true end @@ -9922,2219 +10012,2506 @@ function chainprocs.gpos_mark2ligature(start,stop,kind,chainname,currentcontext, end end if trace_bugs then - logwarning("%s: no matching anchors for mark %s and baselig %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + logwarning("%s: no matching anchors for mark %s and baselig %s",pref(kind,lookupname),gref(markchar),gref(basechar)) end end end - elseif trace_bugs then - logwarning("feature %s, lookup %s: prev node is no char",kind,lookupname) + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) end elseif trace_bugs then - logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + logwarning("%s: prev node is no char",pref(kind,lookupname)) end elseif trace_bugs then - logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) end return start, false end -function chainprocs.gpos_mark2mark(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) +function handlers.gpos_mark2mark(start,kind,lookupname,markanchors,sequence) local markchar = start.char if marks[markchar] then ---~ local alreadydone = markonce and has_attribute(start,markmark) ---~ if not alreadydone then - -- local markanchors = descriptions[markchar].anchors markanchors = markanchors and markanchors.mark - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local markanchors = lookuphash[lookupname] - if markanchors then - markanchors = markanchors[markchar] - end - if markanchors then - local base = start.prev -- [glyph] [basemark] [start=mark] - -- while (base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp)) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end - local slc = has_attribute(start,ligacomp) - if slc then -- a rather messy loop ... needs checking with husayni - while base do - local blc = has_attribute(base,ligacomp) - if blc and blc ~= slc then - base = base.prev - else - break - end - end + local base = start.prev -- [glyph] [basemark] [start=mark] + -- while base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp) do + -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature + -- end + local slc = has_attribute(start,ligacomp) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = has_attribute(base,ligacomp) + if blc and blc ~= slc then + base = base.prev + else + break end - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go - local basechar = base.char - local baseanchors = descriptions[basechar].anchors + end + end + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go + local basechar = base.char + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + if baseanchors then + baseanchors = baseanchors['basemark'] if baseanchors then - baseanchors = baseanchors['basemark'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", - cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) - end - return start, true + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", + pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) end + return start,true end end - if trace_bugs then - logwarning("%s: no matching anchors for mark %s and basemark %s",gref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) - end + end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and basemark %s",pref(kind,lookupname),gref(markchar),gref(basechar)) end end - elseif trace_bugs then - logwarning("%s: prev node is no mark",cref(kind,chainname,chainlookupname,lookupname)) end - elseif trace_bugs then - logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) end ---~ elseif trace_marks and trace_details then ---~ logprocess("%s, mark %s is already bound (n=%s), ignoring mark2mark",pref(kind,lookupname),gref(markchar),alreadydone) ---~ end + elseif trace_bugs then + logwarning("%s: prev node is no mark",pref(kind,lookupname)) + end elseif trace_bugs then - logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) end - return start, false + return start,false end --- ! ! ! untested ! ! ! - -function chainprocs.gpos_cursive(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) +function handlers.gpos_cursive(start,kind,lookupname,exitanchors,sequence) -- to be checked local alreadydone = cursonce and has_attribute(start,cursbase) if not alreadydone then + local done = false local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local exitanchors = lookuphash[lookupname] - if exitanchors then - exitanchors = exitanchors[startchar] - end - if exitanchors then - local done = false - if marks[startchar] then - if trace_cursive then - logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) - end - else - local nxt = start.next - while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do - local nextchar = nxt.char - if marks[nextchar] then - -- should not happen (maybe warning) - nxt = nxt.next - else - local entryanchors = descriptions[nextchar] - if entryanchors then - entryanchors = entryanchors.anchors - if entryanchors then - entryanchors = entryanchors['centry'] - if entryanchors then - local al = anchorlookups[lookupname] - for anchor, entry in next, entryanchors do - if al[anchor] then - local exit = exitanchors[anchor] - if exit then - local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) - if trace_cursive then - logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) - end - done = true - break + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) + end + else + local nxt = start.next + while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do + local nextchar = nxt.char + if marks[nextchar] then + -- should not happen (maybe warning) + nxt = nxt.next + else + local entryanchors = descriptions[nextchar] + if entryanchors then + entryanchors = entryanchors.anchors + if entryanchors then + entryanchors = entryanchors['centry'] + if entryanchors then + local al = anchorlookups[lookupname] + for anchor, entry in next, entryanchors do + if al[anchor] then + local exit = exitanchors[anchor] + if exit then + local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) end + done = true + break end end end end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) - onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) end - break + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) + onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) end + break end end - return start, done - else - if trace_cursive and trace_details then - logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) - end - return start, false end + return start, done + else + if trace_cursive and trace_details then + logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) + end + return start, false end - return start, false end -function chainprocs.gpos_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) - -- untested .. needs checking for the new model +function handlers.gpos_single(start,kind,lookupname,kerns,sequence) local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local kerns = lookuphash[lookupname] - if kerns then - kerns = kerns[startchar] -- needed ? - if kerns then - local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),dx,dy,w,h) - end - end + local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),dx,dy,w,h) end return start, false end --- when machines become faster i will make a shared function - -function chainprocs.gpos_pair(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) --- logwarning("%s: gpos_pair not yet supported",cref(kind,chainname,chainlookupname)) +function handlers.gpos_pair(start,kind,lookupname,kerns,sequence) + -- todo: kerns in disc nodes: pre, post, replace -> loop over disc too + -- todo: kerns in components of ligatures local snext = start.next - if snext then - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local kerns = lookuphash[lookupname] - if kerns then - kerns = kerns[startchar] - if kerns then - local lookuptype = lookuptypes[lookupname] - local prev, done = start, false - local factor = tfmdata.parameters.factor - while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do - local nextchar = snext.char - local krn = kerns[nextchar] - if not krn and marks[nextchar] then - prev = snext - snext = snext.next - else - if not krn then - -- skip - elseif type(krn) == "table" then - if lookuptype == "pair" then - local a, b = krn[2], krn[3] - if a and #a > 0 then - local startchar = start.char - local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - if b and #b > 0 then - local startchar = start.char - local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) - if trace_kerns then - logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - else - report_process("%s: check this out (old kern stuff)",cref(kind,chainname,chainlookupname)) - local a, b = krn[2], krn[6] - if a and a ~= 0 then - local k = setkern(snext,factor,rlmode,a) - if trace_kerns then - logprocess("%s: inserting first kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) - end - end - if b and b ~= 0 then - logwarning("%s: ignoring second kern xoff %s",cref(kind,chainname,chainlookupname),b*factor) - end + if not snext then + return start, false + else + local prev, done = start, false + local factor = tfmdata.parameters.factor + local lookuptype = lookuptypes[lookupname] + while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do + local nextchar = snext.char + local krn = kerns[nextchar] + if not krn and marks[nextchar] then + prev = snext + snext = snext.next + else + local krn = kerns[nextchar] + if not krn then + -- skip + elseif type(krn) == "table" then + if lookuptype == "pair" then -- probably not needed + local a, b = krn[2], krn[3] + if a and #a > 0 then + local startchar = start.char + local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) end - done = true - elseif krn ~= 0 then - local k = setkern(snext,factor,rlmode,krn) + end + if b and #b > 0 then + local startchar = start.char + local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) if trace_kerns then - logprocess("%s: inserting kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) + logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) end - done = true end - break + else -- wrong ... position has different entries + report_process("%s: check this out (old kern stuff)",pref(kind,lookupname)) + -- local a, b = krn[2], krn[6] + -- if a and a ~= 0 then + -- local k = setkern(snext,factor,rlmode,a) + -- if trace_kerns then + -- logprocess("%s: inserting first kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) + -- end + -- end + -- if b and b ~= 0 then + -- logwarning("%s: ignoring second kern xoff %s",pref(kind,lookupname),b*factor) + -- end + end + done = true + elseif krn ~= 0 then + local k = setkern(snext,factor,rlmode,krn) + if trace_kerns then + logprocess("%s: inserting kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) end + done = true end - return start, done + break end end + return start, done + end +end + +--[[ldx-- +

I will implement multiple chain replacements once I run into a font that uses +it. It's not that complex to handle.

+--ldx]]-- + +local chainmores = { } +local chainprocs = { } + +local function logprocess(...) + if trace_steps then + registermessage(...) + end + report_subchain(...) +end + +local logwarning = report_subchain + +local function logprocess(...) + if trace_steps then + registermessage(...) end + report_chain(...) +end + +local logwarning = report_chain + +-- We could share functions but that would lead to extra function calls with many +-- arguments, redundant tests and confusing messages. + +function chainprocs.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname) + logwarning("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) return start, false end --- what pointer to return, spec says stop --- to be discussed ... is bidi changer a space? --- elseif char == zwnj and sequence[n][32] then -- brrr +function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) + logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) + return start, false +end --- somehow l or f is global --- we don't need to pass the currentcontext, saves a bit --- make a slow variant then can be activated but with more tracing +-- The reversesub is a special case, which is why we need to store the replacements +-- in a bit weird way. There is no lookup and the replacement comes from the lookup +-- itself. It is meant mostly for dealing with Urdu. -local function show_skip(kind,chainname,char,ck,class) - if ck[9] then - logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s (%s=>%s)",cref(kind,chainname),gref(char),class,ck[1],ck[2],ck[9],ck[10]) +function chainprocs.reversesub(start,stop,kind,chainname,currentcontext,lookuphash,replacements) + local char = start.char + local replacement = replacements[char] + if replacement then + if trace_singles then + logprocess("%s: single reverse replacement of %s by %s",cref(kind,chainname),gref(char),gref(replacement)) + end + start.char = replacement + return start, true else - logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s",cref(kind,chainname),gref(char),class,ck[1],ck[2]) + return start, false end end -local function normal_handle_contextchain(start,kind,chainname,contexts,sequence,lookuphash) - -- local rule, lookuptype, sequence, f, l, lookups = ck[1], ck[2] ,ck[3], ck[4], ck[5], ck[6] - local flags = sequence.flags - local done = false - local skipmark = flags[1] - local skipligature = flags[2] - local skipbase = flags[3] - local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !) - local markclass = sequence.markclass -- todo, first we need a proper test - local skipped = false - for k=1,#contexts do - local match = true - local current = start - local last = start - local ck = contexts[k] - local seq = ck[3] - local s = #seq - -- f..l = mid string - if s == 1 then - -- never happens - match = current.id == glyph_code and current.subtype<256 and current.font == currentfont and seq[1][current.char] - else - -- maybe we need a better space check (maybe check for glue or category or combination) - -- we cannot optimize for n=2 because there can be disc nodes - local f, l = ck[4], ck[5] - -- current match - if f == 1 and f == l then -- current only - -- already a hit - -- match = true - else -- before/current/after | before/current | current/after - -- no need to test first hit (to be optimized) - if f == l then -- new, else last out of sync (f is > 1) - -- match = true - else - local n = f + 1 - last = last.next - while n <= l do - if last then - local id = last.id - if id == glyph_code then - if last.subtype<256 and last.font == currentfont then - local char = last.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - skipped = true - if trace_skips then - show_skip(kind,chainname,char,ck,class) - end - last = last.next - elseif seq[n][char] then - if n < l then - last = last.next - end - n = n + 1 - else - match = false - break - end - else - match = false - break - end - else - match = false - break - end - elseif id == disc_code then - last = last.next - else - match = false - break - end - else - match = false - break - end - end - end +--[[ldx-- +

This chain stuff is somewhat tricky since we can have a sequence of actions to be +applied: single, alternate, multiple or ligature where ligature can be an invalid +one in the sense that it will replace multiple by one but not neccessary one that +looks like the combination (i.e. it is the counterpart of multiple then). For +example, the following is valid:

+ + +xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx + + +

Therefore we we don't really do the replacement here already unless we have the +single lookup case. The efficiency of the replacements can be improved by deleting +as less as needed but that would also make the code even more messy.

+--ldx]]-- + +local function delete_till_stop(start,stop,ignoremarks) -- keeps start + local n = 1 + if start == stop then + -- done + elseif ignoremarks then + repeat -- start x x m x x stop => start m + local next = start.next + if not marks[next.char] then +local components = next.components +if components then -- probably not needed + flush_node_list(components) +end + delete_node(start,next) end - -- before - if match and f > 1 then - local prev = start.prev - if prev then - local n = f-1 - while n >= 1 do - if prev then - local id = prev.id - if id == glyph_code then - if prev.subtype<256 and prev.font == currentfont then -- normal char - local char = prev.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - skipped = true - if trace_skips then - show_skip(kind,chainname,char,ck,class) - end - elseif seq[n][char] then - n = n -1 - else - match = false - break - end - else - match = false - break - end - else - match = false - break - end - elseif id == disc_code then - -- skip 'm - elseif seq[n][32] then - n = n -1 - else - match = false - break - end - prev = prev.prev - elseif seq[n][32] then -- somehat special, as zapfino can have many preceding spaces - n = n -1 - else - match = false - break - end - end - elseif f == 2 then - match = seq[1][32] - else - for n=f-1,1 do - if not seq[n][32] then - match = false - break - end - end + n = n + 1 + until next == stop + else -- start x x x stop => start + repeat + local next = start.next +local components = next.components +if components then -- probably not needed + flush_node_list(components) +end + delete_node(start,next) + n = n + 1 + until next == stop + end + return n +end + +--[[ldx-- +

Here we replace start by a single variant, First we delete the rest of the +match.

+--ldx]]-- + +function chainprocs.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) + -- todo: marks ? + local current = start + local subtables = currentlookup.subtables + if #subtables > 1 then + logwarning("todo: check if we need to loop over the replacements: %s",concat(subtables," ")) + end + while current do + if current.id == glyph_code then + local currentchar = current.char + local lookupname = subtables[1] -- only 1 + local replacement = lookuphash[lookupname] + if not replacement then + if trace_bugs then + logwarning("%s: no single hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) end - end - -- after - if match and s > l then - local current = last and last.next - if current then - -- removed optimization for s-l == 1, we have to deal with marks anyway - local n = l + 1 - while n <= s do - if current then - local id = current.id - if id == glyph_code then - if current.subtype<256 and current.font == currentfont then -- normal char - local char = current.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - skipped = true - if trace_skips then - show_skip(kind,chainname,char,ck,class) - end - elseif seq[n][char] then - n = n + 1 - else - match = false - break - end - else - match = false - break - end - else - match = false - break - end - elseif id == disc_code then - -- skip 'm - elseif seq[n][32] then -- brrr - n = n + 1 - else - match = false - break - end - current = current.next - elseif seq[n][32] then - n = n + 1 - else - match = false - break - end + else + replacement = replacement[currentchar] + if not replacement then + if trace_bugs then + logwarning("%s: no single for %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar)) end - elseif s-l == 1 then - match = seq[s][32] else - for n=l+1,s do - if not seq[n][32] then - match = false - break - end - end - end - end - end - if match then - -- ck == currentcontext - if trace_contexts then - local rule, lookuptype, f, l = ck[1], ck[2], ck[4], ck[5] - local char = start.char - if ck[9] then - logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s (%s=>%s)", - cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype,ck[9],ck[10]) - else - logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s", - cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype) - end - end - local chainlookups = ck[6] - if chainlookups then - local nofchainlookups = #chainlookups - -- we can speed this up if needed - if nofchainlookups == 1 then - local chainlookupname = chainlookups[1] - local chainlookup = lookuptable[chainlookupname] - if chainlookup then - local cp = chainprocs[chainlookup.type] - if cp then - start, done = cp(start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence) - else - logprocess("%s: %s is not yet supported",cref(kind,chainname,chainlookupname),chainlookup.type) - end - else -- shouldn't happen - logprocess("%s is not yet supported",cref(kind,chainname,chainlookupname)) - end - else - local i = 1 - repeat - if skipped then - while true do - local char = start.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - start = start.next - else - break - end - else - break - end - end - end - local chainlookupname = chainlookups[i] - local chainlookup = lookuptable[chainlookupname] -- can be false (n matches, nofchainlookups - end - else - local replacements = ck[7] - if replacements then - start, done = chainprocs.reversesub(start,last,kind,chainname,ck,lookuphash,replacements) -- sequence - else - done = true -- can be meant to be skipped - if trace_contexts then - logprocess("%s: skipping match",cref(kind,chainname)) + if trace_singles then + logprocess("%s: replacing single %s by %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar),gref(replacement)) end + current.char = replacement end end + return start, true + elseif current == stop then + break + else + current = current.next end end - return start, done + return start, false end --- Because we want to keep this elsewhere (an because speed is less an issue) we --- pass the font id so that the verbose variant can access the relevant helper tables. - -local verbose_handle_contextchain = function(font,...) - logwarning("no verbose handler installed, reverting to 'normal'") - otf.setcontextchain() - return normal_handle_contextchain(...) -end +chainmores.gsub_single = chainprocs.gsub_single -otf.chainhandlers = { - normal = normal_handle_contextchain, - verbose = verbose_handle_contextchain, -} +--[[ldx-- +

Here we replace start by a sequence of new glyphs. First we delete the rest of +the match.

+--ldx]]-- -function otf.setcontextchain(method) - if not method or method == "normal" or not otf.chainhandlers[method] then - if handlers.contextchain then -- no need for a message while making the format - logwarning("installing normal contextchain handler") +function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + delete_till_stop(start,stop) -- we could pass ignoremarks as #3 .. + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local replacements = lookuphash[lookupname] + if not replacements then + if trace_bugs then + logwarning("%s: no multiple hits",cref(kind,chainname,chainlookupname,lookupname)) end - handlers.contextchain = normal_handle_contextchain else - logwarning("installing contextchain handler '%s'",method) - local handler = otf.chainhandlers[method] - handlers.contextchain = function(...) - return handler(currentfont,...) -- hm, get rid of ... + replacements = replacements[startchar] + if not replacements then + if trace_bugs then + logwarning("%s: no multiple for %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar)) + end + else + if trace_multiples then + logprocess("%s: replacing %s by multiple characters %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar),gref(replacements)) + end + return multiple_glyphs(start,replacements) end end - handlers.gsub_context = handlers.contextchain - handlers.gsub_contextchain = handlers.contextchain - handlers.gsub_reversecontextchain = handlers.contextchain - handlers.gpos_contextchain = handlers.contextchain - handlers.gpos_context = handlers.contextchain -end - -otf.setcontextchain() - -local missing = { } -- we only report once - -local function logprocess(...) - if trace_steps then - registermessage(...) - end - report_process(...) -end - -local logwarning = report_process - -local function report_missing_cache(typ,lookup) - local f = missing[currentfont] if not f then f = { } missing[currentfont] = f end - local t = f[typ] if not t then t = { } f[typ] = t end - if not t[lookup] then - t[lookup] = true - logwarning("missing cache for lookup %s of type %s in font %s (%s)",lookup,typ,currentfont,tfmdata.properties.fullname) - end + return start, false end -local resolved = { } -- we only resolve a font,script,language pair once - --- todo: pass all these 'locals' in a table +-- function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end -local lookuphashes = { } +chainmores.gsub_multiple = chainprocs.gsub_multiple -setmetatableindex(lookuphashes, function(t,font) - local lookuphash = fontdata[font].resources.lookuphash - if not lookuphash or not next(lookuphash) then - lookuphash = false - end - t[font] = lookuphash - return lookuphash -end) +--[[ldx-- +

Here we replace start by new glyph. First we delete the rest of the match.

+--ldx]]-- --- fonts.hashes.lookups = lookuphashes +-- char_1 mark_1 -> char_x mark_1 (ignore marks) +-- char_1 mark_1 -> char_x -local special_attributes = { - init = 1, - medi = 2, - fina = 3, - isol = 4 -} +-- to be checked: do we always have just one glyph? +-- we can also have alternates for marks +-- marks come last anyway +-- are there cases where we need to delete the mark -local function initialize(sequence,script,language,enabled) - local features = sequence.features - if features then - for kind, scripts in next, features do - local valid = enabled[kind] - if valid then - local languages = scripts[script] or scripts[wildcard] - if languages and (languages[language] or languages[wildcard]) then - return { valid, special_attributes[kind] or false, sequence.chain or 0, kind, sequence } +function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local current = start + local subtables = currentlookup.subtables + local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue + while current do + if current.id == glyph_code then -- is this check needed? + local currentchar = current.char + local lookupname = subtables[1] + local alternatives = lookuphash[lookupname] + if not alternatives then + if trace_bugs then + logwarning("%s: no alternative hit",cref(kind,chainname,chainlookupname,lookupname)) + end + else + alternatives = alternatives[currentchar] + if alternatives then + local choice = get_alternative_glyph(current,alternatives,value) + if choice then + if trace_alternatives then + logprocess("%s: replacing %s by alternative %s (%s)",cref(kind,chainname,chainlookupname,lookupname),gref(char),gref(choice),choice) + end + start.char = choice + else + if trace_alternatives then + logwarning("%s: no variant %s for %s",cref(kind,chainname,chainlookupname,lookupname),tostring(value),gref(char)) + end + end + elseif trace_bugs then + logwarning("%s: no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),gref(currentchar)) end end + return start, true + elseif current == stop then + break + else + current = current.next end end - return false + return start, false end -function otf.dataset(tfmdata,sequences,font) -- generic variant, overloaded in context - local shared = tfmdata.shared - local properties = tfmdata.properties - local language = properties.language or "dflt" - local script = properties.script or "dflt" - local enabled = shared.features - local res = resolved[font] - if not res then - res = { } - resolved[font] = res - end - local rs = res[script] - if not rs then - rs = { } - res[script] = rs - end - local rl = rs[language] - if not rl then - rl = { } - rs[language] = rl - setmetatableindex(rl, function(t,k) - local v = enabled and initialize(sequences[k],script,language,enabled) - t[k] = v - return v - end) - end - return rl -end - --- elseif id == glue_code then --- if p[5] then -- chain --- local pc = pp[32] --- if pc then --- start, ok = start, false -- p[1](start,kind,p[2],pc,p[3],p[4]) --- if ok then --- done = true --- end --- if start then start = start.next end --- else --- start = start.next --- end --- else --- start = start.next --- end - -local function featuresprocessor(head,font,attr) - - local lookuphash = lookuphashes[font] -- we can also check sequences here - - if not lookuphash then - return head, false - end - - if trace_steps then - checkstep(head) - end - - tfmdata = fontdata[font] - descriptions = tfmdata.descriptions - characters = tfmdata.characters - resources = tfmdata.resources - - marks = resources.marks - anchorlookups = resources.lookup_to_anchor - lookuptable = resources.lookups - lookuptypes = resources.lookuptypes - - currentfont = font - rlmode = 0 - - local sequences = resources.sequences - local done = false - local datasets = otf.dataset(tfmdata,sequences,font,attr) +-- function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end - local dirstack = { } -- could move outside function +chainmores.gsub_alternate = chainprocs.gsub_alternate - -- We could work on sub start-stop ranges instead but I wonder if there is that - -- much speed gain (experiments showed that it made not much sense) and we need - -- to keep track of directions anyway. Also at some point I want to play with - -- font interactions and then we do need the full sweeps. +--[[ldx-- +

When we replace ligatures we use a helper that handles the marks. I might change +this function (move code inline and handle the marks by a separate function). We +assume rather stupid ligatures (no complex disc nodes).

+--ldx]]-- - for s=1,#sequences do - local dataset = datasets[s] - if dataset then - featurevalue = dataset[1] -- todo: pass to function instead of using a global - if featurevalue then - local sequence = sequences[s] -- also dataset[5] - local rlparmode = 0 - local topstack = 0 - local success = false - local attribute = dataset[2] - local chain = dataset[3] -- sequence.chain or 0 - local typ = sequence.type - local subtables = sequence.subtables - if chain < 0 then - -- this is a limited case, no special treatments like 'init' etc - local handler = handlers[typ] - -- we need to get rid of this slide! probably no longer needed in latest luatex - local start = find_node_tail(head) -- slow (we can store tail because there's always a skip at the end): todo - while start do - local id = start.id - if id == glyph_code then - if start.subtype<256 and start.font == font then - local a = has_attribute(start,0) - if a then - a = a == attr - else - a = true - end - if a then - for i=1,#subtables do - local lookupname = subtables[i] - local lookupcache = lookuphash[lookupname] - if lookupcache then - local lookupmatch = lookupcache[start.char] - if lookupmatch then - start, success = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) - if success then - break - end - end - else - report_missing_cache(typ,lookupname) - end - end - if start then start = start.prev end - else - start = start.prev - end +function chainprocs.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local ligatures = lookuphash[lookupname] + if not ligatures then + if trace_bugs then + logwarning("%s: no ligature hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) + end + else + ligatures = ligatures[startchar] + if not ligatures then + if trace_bugs then + logwarning("%s: no ligatures starting with %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) + end + else + local s = start.next + local discfound = false + local last = stop + local nofreplacements = 0 + local skipmark = currentlookup.flags[1] + while s do + local id = s.id + if id == disc_code then + s = s.next + discfound = true + else + local schar = s.char + if skipmark and marks[schar] then -- marks + s = s.next + else + local lg = ligatures[schar] + if lg then + ligatures, last, nofreplacements = lg, s, nofreplacements + 1 + if s == stop then + break else - start = start.prev + s = s.next end else - start = start.prev + break end end + end + end + local l2 = ligatures.ligature + if l2 then + if chainindex then + stop = last + end + if trace_ligatures then + if start == stop then + logprocess("%s: replacing character %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(l2)) + else + logprocess("%s: replacing character %s upto %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char),gref(l2)) + end + end + start = toligature(kind,lookupname,start,stop,l2,currentlookup.flags[1],discfound) + return start, true, nofreplacements + elseif trace_bugs then + if start == stop then + logwarning("%s: replacing character %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) else - local handler = handlers[typ] - local ns = #subtables - local start = head -- local ? - rlmode = 0 -- to be checked ? - if ns == 1 then -- happens often - local lookupname = subtables[1] - local lookupcache = lookuphash[lookupname] - if not lookupcache then -- also check for empty cache - report_missing_cache(typ,lookupname) + logwarning("%s: replacing character %s upto %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char)) + end + end + end + end + return start, false, 0 +end + +chainmores.gsub_ligature = chainprocs.gsub_ligature + +function chainprocs.gpos_mark2base(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = start.char + if marks[markchar] then + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] + end + if markanchors then + local base = start.prev -- [glyph] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then + break + end else - while start do - local id = start.id - if id == glyph_code then - if start.subtype<256 and start.font == font then - local a = has_attribute(start,0) - if a then - a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) - else - a = not attribute or has_attribute(start,state,attribute) - end - if a then - local lookupmatch = lookupcache[start.char] - if lookupmatch then - -- sequence kan weg - local ok - start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,1) - if ok then - success = true - end - end - if start then start = start.next end - else - start = start.next - end - else - start = start.next + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return start, false + end + end + end + local baseanchors = descriptions[basechar].anchors + if baseanchors then + local baseanchors = baseanchors['basechar'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", + cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) end - elseif id == whatsit_code then -- will be function - local subtype = start.subtype - if subtype == dir_code then - local dir = start.dir - if dir == "+TRT" or dir == "+TLT" then - topstack = topstack + 1 - dirstack[topstack] = dir - elseif dir == "-TRT" or dir == "-TLT" then - topstack = topstack - 1 - end - local newdir = dirstack[topstack] - if newdir == "+TRT" then - rlmode = -1 - elseif newdir == "+TLT" then - rlmode = 1 - else - rlmode = rlparmode - end - if trace_directions then - report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) - end - elseif subtype == localpar_code then - local dir = start.dir - if dir == "TRT" then - rlparmode = -1 - elseif dir == "TLT" then - rlparmode = 1 - else - rlparmode = 0 - end - rlmode = rlparmode - if trace_directions then - report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) - end - end - start = start.next - else - start = start.next + return start, true end end end - else - while start do - local id = start.id - if id == glyph_code then - if start.subtype<256 and start.font == font then - local a = has_attribute(start,0) - if a then - a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) - else - a = not attribute or has_attribute(start,state,attribute) - end - if a then - for i=1,ns do - local lookupname = subtables[i] - local lookupcache = lookuphash[lookupname] - if lookupcache then - local lookupmatch = lookupcache[start.char] - if lookupmatch then - -- we could move all code inline but that makes things even more unreadable - local ok - start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) - if ok then - success = true - break - end - end - else - report_missing_cache(typ,lookupname) - end + if trace_bugs then + logwarning("%s, no matching anchors for mark %s and base %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end + end + end + elseif trace_bugs then + logwarning("%s: prev node is no char",cref(kind,chainname,chainlookupname,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + end + return start, false +end + +function chainprocs.gpos_mark2ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = start.char + if marks[markchar] then + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] + end + if markanchors then + local base = start.prev -- [glyph] [optional marks] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",cref(kind,chainname,chainlookupname,lookupname),markchar) + end + return start, false + end + end + end + -- todo: like marks a ligatures hash + local index = has_attribute(start,ligacomp) + local baseanchors = descriptions[basechar].anchors + if baseanchors then + local baseanchors = baseanchors['baselig'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + ba = ba[index] + if ba then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) -- index + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", + cref(kind,chainname,chainlookupname,lookupname),anchor,a or bound,gref(markchar),gref(basechar),index,dx,dy) end - if start then start = start.next end - else - start = start.next - end - else - start = start.next - end - elseif id == whatsit_code then - local subtype = start.subtype - if subtype == dir_code then - local dir = start.dir - if dir == "+TRT" or dir == "+TLT" then - topstack = topstack + 1 - dirstack[topstack] = dir - elseif dir == "-TRT" or dir == "-TLT" then - topstack = topstack - 1 - end - local newdir = dirstack[topstack] - if newdir == "+TRT" then - rlmode = -1 - elseif newdir == "+TLT" then - rlmode = 1 - else - rlmode = rlparmode - end - if trace_directions then - report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) - end - elseif subtype == localpar_code then - local dir = start.dir - if dir == "TRT" then - rlparmode = -1 - elseif dir == "TLT" then - rlparmode = 1 - else - rlparmode = 0 - end - rlmode = rlparmode - if trace_directions then - report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) + return start, true end end - start = start.next - else - start = start.next end end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and baselig %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end end end - if success then - done = true - end - if trace_steps then -- ? - registerstep(head) - end + elseif trace_bugs then + logwarning("feature %s, lookup %s: prev node is no char",kind,lookupname) end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) end - return head, done -end - -local function generic(lookupdata,lookupname,unicode,lookuphash) - local target = lookuphash[lookupname] - if target then - target[unicode] = lookupdata - else - lookuphash[lookupname] = { [unicode] = lookupdata } - end + return start, false end -local action = { - - substitution = generic, - multiple = generic, - alternate = generic, - position = generic, - - ligature = function(lookupdata,lookupname,unicode,lookuphash) - local target = lookuphash[lookupname] - if not target then - target = { } - lookuphash[lookupname] = target - end - for i=1,#lookupdata do - local li = lookupdata[i] - local tu = target[li] - if not tu then - tu = { } - target[li] = tu +function chainprocs.gpos_mark2mark(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = start.char + if marks[markchar] then +--~ local alreadydone = markonce and has_attribute(start,markmark) +--~ if not alreadydone then + -- local markanchors = descriptions[markchar].anchors markanchors = markanchors and markanchors.mark + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] end - target = tu - end - target.ligature = unicode - end, - - pair = function(lookupdata,lookupname,unicode,lookuphash) - local target = lookuphash[lookupname] - if not target then - target = { } - lookuphash[lookupname] = target - end - local others = target[unicode] - local paired = lookupdata[1] - if others then - others[paired] = lookupdata - else - others = { [paired] = lookupdata } - target[unicode] = others - end - end, - -} - -local function prepare_lookups(tfmdata) - - local rawdata = tfmdata.shared.rawdata - local resources = rawdata.resources - local lookuphash = resources.lookuphash - local anchor_to_lookup = resources.anchor_to_lookup - local lookup_to_anchor = resources.lookup_to_anchor - local lookuptypes = resources.lookuptypes - local characters = tfmdata.characters - local descriptions = tfmdata.descriptions - - -- we cannot free the entries in the descriptions as sometimes we access - -- then directly (for instance anchors) ... selectively freeing does save - -- much memory as it's only a reference to a table and the slot in the - -- description hash is not freed anyway - - for unicode, character in next, characters do -- we cannot loop over descriptions ! - - local description = descriptions[unicode] - - if description then - - local lookups = description.slookups - if lookups then - for lookupname, lookupdata in next, lookups do - action[lookuptypes[lookupname]](lookupdata,lookupname,unicode,lookuphash) - end - end - - local lookups = description.mlookups - if lookups then - for lookupname, lookuplist in next, lookups do - local lookuptype = lookuptypes[lookupname] - for l=1,#lookuplist do - local lookupdata = lookuplist[l] - action[lookuptype](lookupdata,lookupname,unicode,lookuphash) - end - end - end - - local list = description.kerns - if list then - for lookup, krn in next, list do -- ref to glyph, saves lookup - local target = lookuphash[lookup] - if target then - target[unicode] = krn - else - lookuphash[lookup] = { [unicode] = krn } + if markanchors then + local base = start.prev -- [glyph] [basemark] [start=mark] + -- while (base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp)) do + -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature + -- end + local slc = has_attribute(start,ligacomp) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = has_attribute(base,ligacomp) + if blc and blc ~= slc then + base = base.prev + else + break + end end end - end - - local list = description.anchors - if list then - for typ, anchors in next, list do -- types - if typ == "mark" or typ == "cexit" then -- or entry? - for name, anchor in next, anchors do - local lookups = anchor_to_lookup[name] - if lookups then - for lookup, _ in next, lookups do - local target = lookuphash[lookup] - if target then - target[unicode] = anchors - else - lookuphash[lookup] = { [unicode] = anchors } + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go + local basechar = base.char + local baseanchors = descriptions[basechar].anchors + if baseanchors then + baseanchors = baseanchors['basemark'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", + cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + end + return start, true end end end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and basemark %s",gref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end end end + elseif trace_bugs then + logwarning("%s: prev node is no mark",cref(kind,chainname,chainlookupname,lookupname)) end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) end - - end - - end - -end - -local function split(replacement,original) - local result = { } - for i=1,#replacement do - result[original[i]] = replacement[i] +--~ elseif trace_marks and trace_details then +--~ logprocess("%s, mark %s is already bound (n=%s), ignoring mark2mark",pref(kind,lookupname),gref(markchar),alreadydone) +--~ end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) end - return result + return start, false end -local valid = { - coverage = { chainsub = true, chainpos = true, contextsub = true }, - reversecoverage = { reversesub = true }, - glyphs = { chainsub = true, chainpos = true }, -} +-- ! ! ! untested ! ! ! -local function prepare_contextchains(tfmdata) - local rawdata = tfmdata.shared.rawdata - local resources = rawdata.resources - local lookuphash = resources.lookuphash - local lookups = rawdata.lookups - if lookups then - for lookupname, lookupdata in next, rawdata.lookups do - local lookuptype = lookupdata.type - if lookuptype then - local rules = lookupdata.rules - if rules then - local format = lookupdata.format - local validformat = valid[format] - if not validformat then - report_prepare("unsupported format %s",format) - elseif not validformat[lookuptype] then - -- todo: dejavu-serif has one (but i need to see what use it has) - report_prepare("unsupported %s %s for %s",format,lookuptype,lookupname) +function chainprocs.gpos_cursive(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local alreadydone = cursonce and has_attribute(start,cursbase) + if not alreadydone then + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local exitanchors = lookuphash[lookupname] + if exitanchors then + exitanchors = exitanchors[startchar] + end + if exitanchors then + local done = false + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) + end + else + local nxt = start.next + while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do + local nextchar = nxt.char + if marks[nextchar] then + -- should not happen (maybe warning) + nxt = nxt.next else - local contexts = lookuphash[lookupname] - if not contexts then - contexts = { } - lookuphash[lookupname] = contexts - end - local t, nt = { }, 0 - for nofrules=1,#rules do - local rule = rules[nofrules] - local current = rule.current - local before = rule.before - local after = rule.after - local replacements = rule.replacements - local sequence = { } - local nofsequences = 0 - -- Wventually we can store start, stop and sequence in the cached file - -- but then less sharing takes place so best not do that without a lot - -- of profiling so let's forget about it. - if before then - for n=1,#before do - nofsequences = nofsequences + 1 - sequence[nofsequences] = before[n] - end - end - local start = nofsequences + 1 - for n=1,#current do - nofsequences = nofsequences + 1 - sequence[nofsequences] = current[n] - end - local stop = nofsequences - if after then - for n=1,#after do - nofsequences = nofsequences + 1 - sequence[nofsequences] = after[n] - end - end - if sequence[1] then - -- Replacements only happen with reverse lookups as they are single only. We - -- could pack them into current (replacement value instead of true) and then - -- use sequence[start] instead but it's somewhat ugly. - nt = nt + 1 - t[nt] = { nofrules, lookuptype, sequence, start, stop, rule.lookups, replacements } - for unic, _ in next, sequence[start] do - local cu = contexts[unic] - if not cu then - contexts[unic] = t + local entryanchors = descriptions[nextchar] + if entryanchors then + entryanchors = entryanchors.anchors + if entryanchors then + entryanchors = entryanchors['centry'] + if entryanchors then + local al = anchorlookups[lookupname] + for anchor, entry in next, entryanchors do + if al[anchor] then + local exit = exitanchors[anchor] + if exit then + local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) + end + done = true + break + end + end end end end + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) + onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) end + break end - else - -- no rules end - else - report_prepare("missing lookuptype for %s",lookupname) end + return start, done + else + if trace_cursive and trace_details then + logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) + end + return start, false end end + return start, false end --- we can consider lookuphash == false (initialized but empty) vs lookuphash == table - -local function featuresinitializer(tfmdata,value) - if true then -- value then - -- beware we need to use the topmost properties table - local rawdata = tfmdata.shared.rawdata - local properties = rawdata.properties - if not properties.initialized then - local starttime = trace_preparing and os.clock() - local resources = rawdata.resources - resources.lookuphash = resources.lookuphash or { } - prepare_contextchains(tfmdata) - prepare_lookups(tfmdata) - properties.initialized = true - if trace_preparing then - report_prepare("preparation time is %0.3f seconds for %s",os.clock()-starttime,tfmdata.properties.fullname or "?") +function chainprocs.gpos_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) + -- untested .. needs checking for the new model + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local kerns = lookuphash[lookupname] + if kerns then + kerns = kerns[startchar] -- needed ? + if kerns then + local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),dx,dy,w,h) end end end + return start, false end -registerotffeature { - name = "features", - description = "features", - default = true, - initializers = { - position = 1, - node = featuresinitializer, - }, - processors = { - node = featuresprocessor, - } -} +-- when machines become faster i will make a shared function -end -- closure +function chainprocs.gpos_pair(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) +-- logwarning("%s: gpos_pair not yet supported",cref(kind,chainname,chainlookupname)) + local snext = start.next + if snext then + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local kerns = lookuphash[lookupname] + if kerns then + kerns = kerns[startchar] + if kerns then + local lookuptype = lookuptypes[lookupname] + local prev, done = start, false + local factor = tfmdata.parameters.factor + while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do + local nextchar = snext.char + local krn = kerns[nextchar] + if not krn and marks[nextchar] then + prev = snext + snext = snext.next + else + if not krn then + -- skip + elseif type(krn) == "table" then + if lookuptype == "pair" then + local a, b = krn[2], krn[3] + if a and #a > 0 then + local startchar = start.char + local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) + end + end + if b and #b > 0 then + local startchar = start.char + local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) + if trace_kerns then + logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) + end + end + else + report_process("%s: check this out (old kern stuff)",cref(kind,chainname,chainlookupname)) + local a, b = krn[2], krn[6] + if a and a ~= 0 then + local k = setkern(snext,factor,rlmode,a) + if trace_kerns then + logprocess("%s: inserting first kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) + end + end + if b and b ~= 0 then + logwarning("%s: ignoring second kern xoff %s",cref(kind,chainname,chainlookupname),b*factor) + end + end + done = true + elseif krn ~= 0 then + local k = setkern(snext,factor,rlmode,krn) + if trace_kerns then + logprocess("%s: inserting kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) + end + done = true + end + break + end + end + return start, done + end + end + end + return start, false +end -do -- begin closure to overcome local limits and interference +-- what pointer to return, spec says stop +-- to be discussed ... is bidi changer a space? +-- elseif char == zwnj and sequence[n][32] then -- brrr -if not modules then modules = { } end modules ['luatex-fonts-chr'] = { - version = 1.001, - comment = "companion to luatex-fonts.lua", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} +-- somehow l or f is global +-- we don't need to pass the currentcontext, saves a bit +-- make a slow variant then can be activated but with more tracing -if context then - texio.write_nl("fatal error: this module is not for context") - os.exit() +local function show_skip(kind,chainname,char,ck,class) + if ck[9] then + logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s (%s=>%s)",cref(kind,chainname),gref(char),class,ck[1],ck[2],ck[9],ck[10]) + else + logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s",cref(kind,chainname),gref(char),class,ck[1],ck[2]) + end end -characters = characters or { } -characters.categories = { - [0x0300]="mn", - [0x0301]="mn", - [0x0302]="mn", - [0x0303]="mn", - [0x0304]="mn", - [0x0305]="mn", - [0x0306]="mn", - [0x0307]="mn", - [0x0308]="mn", - [0x0309]="mn", - [0x030A]="mn", - [0x030B]="mn", - [0x030C]="mn", - [0x030D]="mn", - [0x030E]="mn", - [0x030F]="mn", - [0x0310]="mn", - [0x0311]="mn", - [0x0312]="mn", - [0x0313]="mn", - [0x0314]="mn", - [0x0315]="mn", - [0x0316]="mn", - [0x0317]="mn", - [0x0318]="mn", - [0x0319]="mn", - [0x031A]="mn", - [0x031B]="mn", - [0x031C]="mn", - [0x031D]="mn", - [0x031E]="mn", - [0x031F]="mn", - [0x0320]="mn", - [0x0321]="mn", - [0x0322]="mn", - [0x0323]="mn", - [0x0324]="mn", - [0x0325]="mn", - [0x0326]="mn", - [0x0327]="mn", - [0x0328]="mn", - [0x0329]="mn", - [0x032A]="mn", - [0x032B]="mn", - [0x032C]="mn", - [0x032D]="mn", - [0x032E]="mn", - [0x032F]="mn", - [0x0330]="mn", - [0x0331]="mn", - [0x0332]="mn", - [0x0333]="mn", - [0x0334]="mn", - [0x0335]="mn", - [0x0336]="mn", - [0x0337]="mn", - [0x0338]="mn", - [0x0339]="mn", - [0x033A]="mn", - [0x033B]="mn", - [0x033C]="mn", - [0x033D]="mn", - [0x033E]="mn", - [0x033F]="mn", - [0x0340]="mn", - [0x0341]="mn", - [0x0342]="mn", - [0x0343]="mn", - [0x0344]="mn", - [0x0345]="mn", - [0x0346]="mn", - [0x0347]="mn", - [0x0348]="mn", - [0x0349]="mn", - [0x034A]="mn", - [0x034B]="mn", - [0x034C]="mn", - [0x034D]="mn", - [0x034E]="mn", - [0x034F]="mn", - [0x0350]="mn", - [0x0351]="mn", - [0x0352]="mn", - [0x0353]="mn", - [0x0354]="mn", - [0x0355]="mn", - [0x0356]="mn", - [0x0357]="mn", - [0x0358]="mn", - [0x0359]="mn", - [0x035A]="mn", - [0x035B]="mn", - [0x035C]="mn", - [0x035D]="mn", - [0x035E]="mn", - [0x035F]="mn", - [0x0360]="mn", - [0x0361]="mn", - [0x0362]="mn", - [0x0363]="mn", - [0x0364]="mn", - [0x0365]="mn", - [0x0366]="mn", - [0x0367]="mn", - [0x0368]="mn", - [0x0369]="mn", - [0x036A]="mn", - [0x036B]="mn", - [0x036C]="mn", - [0x036D]="mn", - [0x036E]="mn", - [0x036F]="mn", - [0x0483]="mn", - [0x0484]="mn", - [0x0485]="mn", - [0x0486]="mn", - [0x0591]="mn", - [0x0592]="mn", - [0x0593]="mn", - [0x0594]="mn", - [0x0595]="mn", - [0x0596]="mn", - [0x0597]="mn", - [0x0598]="mn", - [0x0599]="mn", - [0x059A]="mn", - [0x059B]="mn", - [0x059C]="mn", - [0x059D]="mn", - [0x059E]="mn", - [0x059F]="mn", - [0x05A0]="mn", - [0x05A1]="mn", - [0x05A2]="mn", - [0x05A3]="mn", - [0x05A4]="mn", - [0x05A5]="mn", - [0x05A6]="mn", - [0x05A7]="mn", - [0x05A8]="mn", - [0x05A9]="mn", - [0x05AA]="mn", - [0x05AB]="mn", - [0x05AC]="mn", - [0x05AD]="mn", - [0x05AE]="mn", - [0x05AF]="mn", - [0x05B0]="mn", - [0x05B1]="mn", - [0x05B2]="mn", - [0x05B3]="mn", - [0x05B4]="mn", - [0x05B5]="mn", - [0x05B6]="mn", - [0x05B7]="mn", - [0x05B8]="mn", - [0x05B9]="mn", - [0x05BA]="mn", - [0x05BB]="mn", - [0x05BC]="mn", - [0x05BD]="mn", - [0x05BF]="mn", - [0x05C1]="mn", - [0x05C2]="mn", - [0x05C4]="mn", - [0x05C5]="mn", - [0x05C7]="mn", - [0x0610]="mn", - [0x0611]="mn", - [0x0612]="mn", - [0x0613]="mn", - [0x0614]="mn", - [0x0615]="mn", - [0x064B]="mn", - [0x064C]="mn", - [0x064D]="mn", - [0x064E]="mn", - [0x064F]="mn", - [0x0650]="mn", - [0x0651]="mn", - [0x0652]="mn", - [0x0653]="mn", - [0x0654]="mn", - [0x0655]="mn", - [0x0656]="mn", - [0x0657]="mn", - [0x0658]="mn", - [0x0659]="mn", - [0x065A]="mn", - [0x065B]="mn", - [0x065C]="mn", - [0x065D]="mn", - [0x065E]="mn", - [0x0670]="mn", - [0x06D6]="mn", - [0x06D7]="mn", - [0x06D8]="mn", - [0x06D9]="mn", - [0x06DA]="mn", - [0x06DB]="mn", - [0x06DC]="mn", - [0x06DF]="mn", - [0x06E0]="mn", - [0x06E1]="mn", - [0x06E2]="mn", - [0x06E3]="mn", - [0x06E4]="mn", - [0x06E7]="mn", - [0x06E8]="mn", - [0x06EA]="mn", - [0x06EB]="mn", - [0x06EC]="mn", - [0x06ED]="mn", - [0x0711]="mn", - [0x0730]="mn", - [0x0731]="mn", - [0x0732]="mn", - [0x0733]="mn", - [0x0734]="mn", - [0x0735]="mn", - [0x0736]="mn", - [0x0737]="mn", - [0x0738]="mn", - [0x0739]="mn", - [0x073A]="mn", - [0x073B]="mn", - [0x073C]="mn", - [0x073D]="mn", - [0x073E]="mn", - [0x073F]="mn", - [0x0740]="mn", - [0x0741]="mn", - [0x0742]="mn", - [0x0743]="mn", - [0x0744]="mn", - [0x0745]="mn", - [0x0746]="mn", - [0x0747]="mn", - [0x0748]="mn", - [0x0749]="mn", - [0x074A]="mn", - [0x07A6]="mn", - [0x07A7]="mn", - [0x07A8]="mn", - [0x07A9]="mn", - [0x07AA]="mn", - [0x07AB]="mn", - [0x07AC]="mn", - [0x07AD]="mn", - [0x07AE]="mn", - [0x07AF]="mn", - [0x07B0]="mn", - [0x07EB]="mn", - [0x07EC]="mn", - [0x07ED]="mn", - [0x07EE]="mn", - [0x07EF]="mn", - [0x07F0]="mn", - [0x07F1]="mn", - [0x07F2]="mn", - [0x07F3]="mn", - [0x0901]="mn", - [0x0902]="mn", - [0x093C]="mn", - [0x0941]="mn", - [0x0942]="mn", - [0x0943]="mn", - [0x0944]="mn", - [0x0945]="mn", - [0x0946]="mn", - [0x0947]="mn", - [0x0948]="mn", - [0x094D]="mn", - [0x0951]="mn", - [0x0952]="mn", - [0x0953]="mn", - [0x0954]="mn", - [0x0962]="mn", - [0x0963]="mn", - [0x0981]="mn", - [0x09BC]="mn", - [0x09C1]="mn", - [0x09C2]="mn", - [0x09C3]="mn", - [0x09C4]="mn", - [0x09CD]="mn", - [0x09E2]="mn", - [0x09E3]="mn", - [0x0A01]="mn", - [0x0A02]="mn", - [0x0A3C]="mn", - [0x0A41]="mn", - [0x0A42]="mn", - [0x0A47]="mn", - [0x0A48]="mn", - [0x0A4B]="mn", - [0x0A4C]="mn", - [0x0A4D]="mn", - [0x0A70]="mn", - [0x0A71]="mn", - [0x0A81]="mn", - [0x0A82]="mn", - [0x0ABC]="mn", - [0x0AC1]="mn", - [0x0AC2]="mn", - [0x0AC3]="mn", - [0x0AC4]="mn", - [0x0AC5]="mn", - [0x0AC7]="mn", - [0x0AC8]="mn", - [0x0ACD]="mn", - [0x0AE2]="mn", - [0x0AE3]="mn", - [0x0B01]="mn", - [0x0B3C]="mn", - [0x0B3F]="mn", - [0x0B41]="mn", - [0x0B42]="mn", - [0x0B43]="mn", - [0x0B4D]="mn", - [0x0B56]="mn", - [0x0B82]="mn", - [0x0BC0]="mn", - [0x0BCD]="mn", - [0x0C3E]="mn", - [0x0C3F]="mn", - [0x0C40]="mn", - [0x0C46]="mn", - [0x0C47]="mn", - [0x0C48]="mn", - [0x0C4A]="mn", - [0x0C4B]="mn", - [0x0C4C]="mn", - [0x0C4D]="mn", - [0x0C55]="mn", - [0x0C56]="mn", - [0x0CBC]="mn", - [0x0CBF]="mn", - [0x0CC6]="mn", - [0x0CCC]="mn", - [0x0CCD]="mn", - [0x0CE2]="mn", - [0x0CE3]="mn", - [0x0D41]="mn", - [0x0D42]="mn", - [0x0D43]="mn", - [0x0D4D]="mn", - [0x0DCA]="mn", - [0x0DD2]="mn", - [0x0DD3]="mn", - [0x0DD4]="mn", - [0x0DD6]="mn", - [0x0E31]="mn", - [0x0E34]="mn", - [0x0E35]="mn", - [0x0E36]="mn", - [0x0E37]="mn", - [0x0E38]="mn", - [0x0E39]="mn", - [0x0E3A]="mn", - [0x0E47]="mn", - [0x0E48]="mn", - [0x0E49]="mn", - [0x0E4A]="mn", - [0x0E4B]="mn", - [0x0E4C]="mn", - [0x0E4D]="mn", - [0x0E4E]="mn", - [0x0EB1]="mn", - [0x0EB4]="mn", - [0x0EB5]="mn", - [0x0EB6]="mn", - [0x0EB7]="mn", - [0x0EB8]="mn", - [0x0EB9]="mn", - [0x0EBB]="mn", - [0x0EBC]="mn", - [0x0EC8]="mn", - [0x0EC9]="mn", - [0x0ECA]="mn", - [0x0ECB]="mn", - [0x0ECC]="mn", - [0x0ECD]="mn", - [0x0F18]="mn", - [0x0F19]="mn", - [0x0F35]="mn", - [0x0F37]="mn", - [0x0F39]="mn", - [0x0F71]="mn", - [0x0F72]="mn", - [0x0F73]="mn", - [0x0F74]="mn", - [0x0F75]="mn", - [0x0F76]="mn", - [0x0F77]="mn", - [0x0F78]="mn", - [0x0F79]="mn", - [0x0F7A]="mn", - [0x0F7B]="mn", - [0x0F7C]="mn", - [0x0F7D]="mn", - [0x0F7E]="mn", - [0x0F80]="mn", - [0x0F81]="mn", - [0x0F82]="mn", - [0x0F83]="mn", - [0x0F84]="mn", - [0x0F86]="mn", - [0x0F87]="mn", - [0x0F90]="mn", - [0x0F91]="mn", - [0x0F92]="mn", - [0x0F93]="mn", - [0x0F94]="mn", - [0x0F95]="mn", - [0x0F96]="mn", - [0x0F97]="mn", - [0x0F99]="mn", - [0x0F9A]="mn", - [0x0F9B]="mn", - [0x0F9C]="mn", - [0x0F9D]="mn", - [0x0F9E]="mn", - [0x0F9F]="mn", - [0x0FA0]="mn", - [0x0FA1]="mn", - [0x0FA2]="mn", - [0x0FA3]="mn", - [0x0FA4]="mn", - [0x0FA5]="mn", - [0x0FA6]="mn", - [0x0FA7]="mn", - [0x0FA8]="mn", - [0x0FA9]="mn", - [0x0FAA]="mn", - [0x0FAB]="mn", - [0x0FAC]="mn", - [0x0FAD]="mn", - [0x0FAE]="mn", - [0x0FAF]="mn", - [0x0FB0]="mn", - [0x0FB1]="mn", - [0x0FB2]="mn", - [0x0FB3]="mn", - [0x0FB4]="mn", - [0x0FB5]="mn", - [0x0FB6]="mn", - [0x0FB7]="mn", - [0x0FB8]="mn", - [0x0FB9]="mn", - [0x0FBA]="mn", - [0x0FBB]="mn", - [0x0FBC]="mn", - [0x0FC6]="mn", - [0x102D]="mn", - [0x102E]="mn", - [0x102F]="mn", - [0x1030]="mn", - [0x1032]="mn", - [0x1036]="mn", - [0x1037]="mn", - [0x1039]="mn", - [0x1058]="mn", - [0x1059]="mn", - [0x135F]="mn", - [0x1712]="mn", - [0x1713]="mn", - [0x1714]="mn", - [0x1732]="mn", - [0x1733]="mn", - [0x1734]="mn", - [0x1752]="mn", - [0x1753]="mn", - [0x1772]="mn", - [0x1773]="mn", - [0x17B7]="mn", - [0x17B8]="mn", - [0x17B9]="mn", - [0x17BA]="mn", - [0x17BB]="mn", - [0x17BC]="mn", - [0x17BD]="mn", - [0x17C6]="mn", - [0x17C9]="mn", - [0x17CA]="mn", - [0x17CB]="mn", - [0x17CC]="mn", - [0x17CD]="mn", - [0x17CE]="mn", - [0x17CF]="mn", - [0x17D0]="mn", - [0x17D1]="mn", - [0x17D2]="mn", - [0x17D3]="mn", - [0x17DD]="mn", - [0x180B]="mn", - [0x180C]="mn", - [0x180D]="mn", - [0x18A9]="mn", - [0x1920]="mn", - [0x1921]="mn", - [0x1922]="mn", - [0x1927]="mn", - [0x1928]="mn", - [0x1932]="mn", - [0x1939]="mn", - [0x193A]="mn", - [0x193B]="mn", - [0x1A17]="mn", - [0x1A18]="mn", - [0x1B00]="mn", - [0x1B01]="mn", - [0x1B02]="mn", - [0x1B03]="mn", - [0x1B34]="mn", - [0x1B36]="mn", - [0x1B37]="mn", - [0x1B38]="mn", - [0x1B39]="mn", - [0x1B3A]="mn", - [0x1B3C]="mn", - [0x1B42]="mn", - [0x1B6B]="mn", - [0x1B6C]="mn", - [0x1B6D]="mn", - [0x1B6E]="mn", - [0x1B6F]="mn", - [0x1B70]="mn", - [0x1B71]="mn", - [0x1B72]="mn", - [0x1B73]="mn", - [0x1DC0]="mn", - [0x1DC1]="mn", - [0x1DC2]="mn", - [0x1DC3]="mn", - [0x1DC4]="mn", - [0x1DC5]="mn", - [0x1DC6]="mn", - [0x1DC7]="mn", - [0x1DC8]="mn", - [0x1DC9]="mn", - [0x1DCA]="mn", - [0x1DFE]="mn", - [0x1DFF]="mn", - [0x20D0]="mn", - [0x20D1]="mn", - [0x20D2]="mn", - [0x20D3]="mn", - [0x20D4]="mn", - [0x20D5]="mn", - [0x20D6]="mn", - [0x20D7]="mn", - [0x20D8]="mn", - [0x20D9]="mn", - [0x20DA]="mn", - [0x20DB]="mn", - [0x20DC]="mn", - [0x20E1]="mn", - [0x20E5]="mn", - [0x20E6]="mn", - [0x20E7]="mn", - [0x20E8]="mn", - [0x20E9]="mn", - [0x20EA]="mn", - [0x20EB]="mn", - [0x20EC]="mn", - [0x20ED]="mn", - [0x20EE]="mn", - [0x20EF]="mn", - [0x302A]="mn", - [0x302B]="mn", - [0x302C]="mn", - [0x302D]="mn", - [0x302E]="mn", - [0x302F]="mn", - [0x3099]="mn", - [0x309A]="mn", - [0xA806]="mn", - [0xA80B]="mn", - [0xA825]="mn", - [0xA826]="mn", - [0xFB1E]="mn", - [0xFE00]="mn", - [0xFE01]="mn", - [0xFE02]="mn", - [0xFE03]="mn", - [0xFE04]="mn", - [0xFE05]="mn", - [0xFE06]="mn", - [0xFE07]="mn", - [0xFE08]="mn", - [0xFE09]="mn", - [0xFE0A]="mn", - [0xFE0B]="mn", - [0xFE0C]="mn", - [0xFE0D]="mn", - [0xFE0E]="mn", - [0xFE0F]="mn", - [0xFE20]="mn", - [0xFE21]="mn", - [0xFE22]="mn", - [0xFE23]="mn", +local function normal_handle_contextchain(start,kind,chainname,contexts,sequence,lookuphash) + -- local rule, lookuptype, sequence, f, l, lookups = ck[1], ck[2] ,ck[3], ck[4], ck[5], ck[6] + local flags = sequence.flags + local done = false + local skipmark = flags[1] + local skipligature = flags[2] + local skipbase = flags[3] + local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !) + local markclass = sequence.markclass -- todo, first we need a proper test + local skipped = false + for k=1,#contexts do + local match = true + local current = start + local last = start + local ck = contexts[k] + local seq = ck[3] + local s = #seq + -- f..l = mid string + if s == 1 then + -- never happens + match = current.id == glyph_code and current.subtype<256 and current.font == currentfont and seq[1][current.char] + else + -- maybe we need a better space check (maybe check for glue or category or combination) + -- we cannot optimize for n=2 because there can be disc nodes + local f, l = ck[4], ck[5] + -- current match + if f == 1 and f == l then -- current only + -- already a hit + -- match = true + else -- before/current/after | before/current | current/after + -- no need to test first hit (to be optimized) + if f == l then -- new, else last out of sync (f is > 1) + -- match = true + else + local n = f + 1 + last = last.next + while n <= l do + if last then + local id = last.id + if id == glyph_code then + if last.subtype<256 and last.font == currentfont then + local char = last.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + last = last.next + elseif seq[n][char] then + if n < l then + last = last.next + end + n = n + 1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + last = last.next + else + match = false + break + end + else + match = false + break + end + end + end + end + -- before + if match and f > 1 then + local prev = start.prev + if prev then + local n = f-1 + while n >= 1 do + if prev then + local id = prev.id + if id == glyph_code then + if prev.subtype<256 and prev.font == currentfont then -- normal char + local char = prev.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + elseif seq[n][char] then + n = n -1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + -- skip 'm + elseif seq[n][32] then + n = n -1 + else + match = false + break + end + prev = prev.prev + elseif seq[n][32] then -- somewhat special, as zapfino can have many preceding spaces + n = n -1 + else + match = false + break + end + end + elseif f == 2 then + match = seq[1][32] + else + for n=f-1,1 do + if not seq[n][32] then + match = false + break + end + end + end + end + -- after + if match and s > l then + local current = last and last.next + if current then + -- removed optimization for s-l == 1, we have to deal with marks anyway + local n = l + 1 + while n <= s do + if current then + local id = current.id + if id == glyph_code then + if current.subtype<256 and current.font == currentfont then -- normal char + local char = current.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + elseif seq[n][char] then + n = n + 1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + -- skip 'm + elseif seq[n][32] then -- brrr + n = n + 1 + else + match = false + break + end + current = current.next + elseif seq[n][32] then + n = n + 1 + else + match = false + break + end + end + elseif s-l == 1 then + match = seq[s][32] + else + for n=l+1,s do + if not seq[n][32] then + match = false + break + end + end + end + end + end + if match then + -- ck == currentcontext + if trace_contexts then + local rule, lookuptype, f, l = ck[1], ck[2], ck[4], ck[5] + local char = start.char + if ck[9] then + logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s (%s=>%s)", + cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype,ck[9],ck[10]) + else + logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s", + cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype) + end + end + local chainlookups = ck[6] + if chainlookups then + local nofchainlookups = #chainlookups + -- we can speed this up if needed + if nofchainlookups == 1 then + local chainlookupname = chainlookups[1] + local chainlookup = lookuptable[chainlookupname] + if chainlookup then + local cp = chainprocs[chainlookup.type] + if cp then + start, done = cp(start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence) + else + logprocess("%s: %s is not yet supported",cref(kind,chainname,chainlookupname),chainlookup.type) + end + else -- shouldn't happen + logprocess("%s is not yet supported",cref(kind,chainname,chainlookupname)) + end + else + local i = 1 + repeat + if skipped then + while true do + local char = start.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + start = start.next + else + break + end + else + break + end + end + end + local chainlookupname = chainlookups[i] + local chainlookup = lookuptable[chainlookupname] -- can be false (n matches, nofchainlookups + end + else + local replacements = ck[7] + if replacements then + start, done = chainprocs.reversesub(start,last,kind,chainname,ck,lookuphash,replacements) -- sequence + else + done = true -- can be meant to be skipped + if trace_contexts then + logprocess("%s: skipping match",cref(kind,chainname)) + end + end + end + end + end + return start, done +end + +-- Because we want to keep this elsewhere (an because speed is less an issue) we +-- pass the font id so that the verbose variant can access the relevant helper tables. + +local verbose_handle_contextchain = function(font,...) + logwarning("no verbose handler installed, reverting to 'normal'") + otf.setcontextchain() + return normal_handle_contextchain(...) +end + +otf.chainhandlers = { + normal = normal_handle_contextchain, + verbose = verbose_handle_contextchain, } -end -- closure +function otf.setcontextchain(method) + if not method or method == "normal" or not otf.chainhandlers[method] then + if handlers.contextchain then -- no need for a message while making the format + logwarning("installing normal contextchain handler") + end + handlers.contextchain = normal_handle_contextchain + else + logwarning("installing contextchain handler '%s'",method) + local handler = otf.chainhandlers[method] + handlers.contextchain = function(...) + return handler(currentfont,...) -- hm, get rid of ... + end + end + handlers.gsub_context = handlers.contextchain + handlers.gsub_contextchain = handlers.contextchain + handlers.gsub_reversecontextchain = handlers.contextchain + handlers.gpos_contextchain = handlers.contextchain + handlers.gpos_context = handlers.contextchain +end -do -- begin closure to overcome local limits and interference +otf.setcontextchain() -if not modules then modules = { } end modules ['font-ota'] = { - version = 1.001, - comment = "companion to font-otf.lua (analysing)", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} +local missing = { } -- we only report once --- this might become scrp-*.lua +local function logprocess(...) + if trace_steps then + registermessage(...) + end + report_process(...) +end -local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat +local logwarning = report_process -if not trackers then trackers = { register = function() end } end +local function report_missing_cache(typ,lookup) + local f = missing[currentfont] if not f then f = { } missing[currentfont] = f end + local t = f[typ] if not t then t = { } f[typ] = t end + if not t[lookup] then + t[lookup] = true + logwarning("missing cache for lookup %s of type %s in font %s (%s)",lookup,typ,currentfont,tfmdata.properties.fullname) + end +end -local trace_analyzing = false trackers.register("otf.analyzing", function(v) trace_analyzing = v end) +local resolved = { } -- we only resolve a font,script,language pair once + +-- todo: pass all these 'locals' in a table + +local lookuphashes = { } + +setmetatableindex(lookuphashes, function(t,font) + local lookuphash = fontdata[font].resources.lookuphash + if not lookuphash or not next(lookuphash) then + lookuphash = false + end + t[font] = lookuphash + return lookuphash +end) + +-- fonts.hashes.lookups = lookuphashes + +local constants = fonts.analyzers.constants + +local function initialize(sequence,script,language,enabled) + local features = sequence.features + if features then + for kind, scripts in next, features do + local valid = enabled[kind] + if valid then + local languages = scripts[script] or scripts[wildcard] + if languages and (languages[language] or languages[wildcard]) then + return { valid, constants[kind] or false, sequence.chain or 0, kind, sequence } + end + end + end + end + return false +end + +function otf.dataset(tfmdata,font) -- generic variant, overloaded in context + local shared = tfmdata.shared + local properties = tfmdata.properties + local language = properties.language or "dflt" + local script = properties.script or "dflt" + local enabled = shared.features + local res = resolved[font] + if not res then + res = { } + resolved[font] = res + end + local rs = res[script] + if not rs then + rs = { } + res[script] = rs + end + local rl = rs[language] + if not rl then + rl = { + -- indexed but we can also add specific data by key + } + rs[language] = rl + local sequences = tfmdata.resources.sequences + setmetatableindex(rl, function(t,k) + if type(k) == "number" then + local v = enabled and initialize(sequences[k],script,language,enabled) + t[k] = v + return v + end + end) + end + return rl +end + +-- elseif id == glue_code then +-- if p[5] then -- chain +-- local pc = pp[32] +-- if pc then +-- start, ok = start, false -- p[1](start,kind,p[2],pc,p[3],p[4]) +-- if ok then +-- done = true +-- end +-- if start then start = start.next end +-- else +-- start = start.next +-- end +-- else +-- start = start.next +-- end + +-- there will be a new direction parser (pre-parsed etc) + +local function featuresprocessor(head,font,attr) + + local lookuphash = lookuphashes[font] -- we can also check sequences here + + if not lookuphash then + return head, false + end + + if trace_steps then + checkstep(head) + end + + tfmdata = fontdata[font] + descriptions = tfmdata.descriptions + characters = tfmdata.characters + resources = tfmdata.resources + + marks = resources.marks + anchorlookups = resources.lookup_to_anchor + lookuptable = resources.lookups + lookuptypes = resources.lookuptypes + + currentfont = font + rlmode = 0 + + local sequences = resources.sequences + local done = false + local datasets = otf.dataset(tfmdata,font,attr) + + local dirstack = { } -- could move outside function + + -- We could work on sub start-stop ranges instead but I wonder if there is that + -- much speed gain (experiments showed that it made not much sense) and we need + -- to keep track of directions anyway. Also at some point I want to play with + -- font interactions and then we do need the full sweeps. + + -- Keeping track of the headnode is needed for devanagari (I generalized it a bit + -- so that multiple cases are also covered. + + for s=1,#sequences do + local dataset = datasets[s] + if dataset then + featurevalue = dataset[1] -- todo: pass to function instead of using a global + if featurevalue then + local sequence = sequences[s] -- also dataset[5] + local rlparmode = 0 + local topstack = 0 + local success = false + local attribute = dataset[2] + local chain = dataset[3] -- sequence.chain or 0 + local typ = sequence.type + local subtables = sequence.subtables + if chain < 0 then + -- this is a limited case, no special treatments like 'init' etc + local handler = handlers[typ] + -- we need to get rid of this slide! probably no longer needed in latest luatex + local start = find_node_tail(head) -- slow (we can store tail because there's always a skip at the end): todo + while start do + local id = start.id + if id == glyph_code then + if start.subtype<256 and start.font == font then + local a = has_attribute(start,0) + if a then + a = a == attr + else + a = true + end + if a then + for i=1,#subtables do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[start.char] + if lookupmatch then + local headnode = start == head + start, success = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if success then + if headnode then + head = start + end + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + if start then start = start.prev end + else + start = start.prev + end + else + start = start.prev + end + else + start = start.prev + end + end + else + local handler = handlers[typ] + local ns = #subtables + local start = head -- local ? + rlmode = 0 -- to be checked ? + if ns == 1 then -- happens often + local lookupname = subtables[1] + local lookupcache = lookuphash[lookupname] + if not lookupcache then -- also check for empty cache + report_missing_cache(typ,lookupname) + else + while start do + local id = start.id + if id == glyph_code then + if start.subtype<256 and start.font == font then + local a = has_attribute(start,0) + if a then + a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) + else + a = not attribute or has_attribute(start,state,attribute) + end + if a then + local lookupmatch = lookupcache[start.char] + if lookupmatch then + -- sequence kan weg + local headnode = start == head + local ok + start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,1) + if ok then + success = true + if headnode then + head = start + end + end + end + if start then start = start.next end + else + start = start.next + end + else + start = start.next + end + elseif id == whatsit_code then -- will be function + local subtype = start.subtype + if subtype == dir_code then + local dir = start.dir + if dir == "+TRT" or dir == "+TLT" then + topstack = topstack + 1 + dirstack[topstack] = dir + elseif dir == "-TRT" or dir == "-TLT" then + topstack = topstack - 1 + end + local newdir = dirstack[topstack] + if newdir == "+TRT" then + rlmode = -1 + elseif newdir == "+TLT" then + rlmode = 1 + else + rlmode = rlparmode + end + if trace_directions then + report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) + end + elseif subtype == localpar_code then + local dir = start.dir + if dir == "TRT" then + rlparmode = -1 + elseif dir == "TLT" then + rlparmode = 1 + else + rlparmode = 0 + end + rlmode = rlparmode + if trace_directions then + report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) + end + end + start = start.next + else + start = start.next + end + end + end + else + while start do + local id = start.id + if id == glyph_code then + if start.subtype<256 and start.font == font then + local a = has_attribute(start,0) + if a then + a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) + else + a = not attribute or has_attribute(start,state,attribute) + end + if a then + for i=1,ns do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[start.char] + if lookupmatch then + -- we could move all code inline but that makes things even more unreadable + local headnode = start == head + local ok + start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if ok then + success = true + if headnode then + head = start + end + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + if start then start = start.next end + else + start = start.next + end + else + start = start.next + end + elseif id == whatsit_code then + local subtype = start.subtype + if subtype == dir_code then + local dir = start.dir + if dir == "+TRT" or dir == "+TLT" then + topstack = topstack + 1 + dirstack[topstack] = dir + elseif dir == "-TRT" or dir == "-TLT" then + topstack = topstack - 1 + end + local newdir = dirstack[topstack] + if newdir == "+TRT" then + rlmode = -1 + elseif newdir == "+TLT" then + rlmode = 1 + else + rlmode = rlparmode + end + if trace_directions then + report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) + end + elseif subtype == localpar_code then + local dir = start.dir + if dir == "TRT" then + rlparmode = -1 + elseif dir == "TLT" then + rlparmode = 1 + else + rlparmode = 0 + end + rlmode = rlparmode + if trace_directions then + report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) + end + end + start = start.next + else + start = start.next + end + end + end + end + if success then + done = true + end + if trace_steps then -- ? + registerstep(head) + end + end + end + end + return head, done +end -local fonts, nodes, node = fonts, nodes, node +local function generic(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if target then + target[unicode] = lookupdata + else + lookuphash[lookupname] = { [unicode] = lookupdata } + end +end -local allocate = utilities.storage.allocate +local action = { -local otf = fonts.handlers.otf + substitution = generic, + multiple = generic, + alternate = generic, + position = generic, -local analyzers = fonts.analyzers -local initializers = allocate() -local methods = allocate() + ligature = function(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if not target then + target = { } + lookuphash[lookupname] = target + end + for i=1,#lookupdata do + local li = lookupdata[i] + local tu = target[li] + if not tu then + tu = { } + target[li] = tu + end + target = tu + end + target.ligature = unicode + end, -analyzers.initializers = initializers -analyzers.methods = methods -analyzers.useunicodemarks = false + pair = function(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if not target then + target = { } + lookuphash[lookupname] = target + end + local others = target[unicode] + local paired = lookupdata[1] + if others then + others[paired] = lookupdata + else + others = { [paired] = lookupdata } + target[unicode] = others + end + end, -local nodecodes = nodes.nodecodes -local glyph_code = nodecodes.glyph +} -local set_attribute = node.set_attribute -local has_attribute = node.has_attribute -local traverse_id = node.traverse_id -local traverse_node_list = node.traverse +local function prepare_lookups(tfmdata) -local fontdata = fonts.hashes.identifiers -local state = attributes.private('state') -local categories = characters and characters.categories or { } -- sorry, only in context + local rawdata = tfmdata.shared.rawdata + local resources = rawdata.resources + local lookuphash = resources.lookuphash + local anchor_to_lookup = resources.anchor_to_lookup + local lookup_to_anchor = resources.lookup_to_anchor + local lookuptypes = resources.lookuptypes + local characters = tfmdata.characters + local descriptions = tfmdata.descriptions -local tracers = nodes.tracers -local colortracers = tracers and tracers.colors -local setnodecolor = colortracers and colortracers.set or function() end -local resetnodecolor = colortracers and colortracers.reset or function() end + -- we cannot free the entries in the descriptions as sometimes we access + -- then directly (for instance anchors) ... selectively freeing does save + -- much memory as it's only a reference to a table and the slot in the + -- description hash is not freed anyway -local otffeatures = fonts.constructors.newfeatures("otf") -local registerotffeature = otffeatures.register + for unicode, character in next, characters do -- we cannot loop over descriptions ! ---[[ldx-- -

Analyzers run per script and/or language and are needed in order to -process features right.

---ldx]]-- + local description = descriptions[unicode] --- todo: analyzers per script/lang, cross font, so we need an font id hash -> script --- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace + if description then -local state = attributes.private('state') + local lookups = description.slookups + if lookups then + for lookupname, lookupdata in next, lookups do + action[lookuptypes[lookupname]](lookupdata,lookupname,unicode,lookuphash) + end + end -function analyzers.setstate(head,font) - local useunicodemarks = analyzers.useunicodemarks - local tfmdata = fontdata[font] - local characters = tfmdata.characters - local descriptions = tfmdata.descriptions - local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean - while current do - local id = current.id - if id == glyph_code and current.font == font then - local char = current.char - local d = descriptions[char] - if d then - if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then - done = true - set_attribute(current,state,5) -- mark - elseif n == 0 then - first, last, n = current, current, 1 - set_attribute(current,state,1) -- init - else - last, n = current, n+1 - set_attribute(current,state,2) -- medi + local lookups = description.mlookups + if lookups then + for lookupname, lookuplist in next, lookups do + local lookuptype = lookuptypes[lookupname] + for l=1,#lookuplist do + local lookupdata = lookuplist[l] + action[lookuptype](lookupdata,lookupname,unicode,lookuphash) + end end - else -- finish - if first and first == last then - set_attribute(last,state,4) -- isol - elseif last then - set_attribute(last,state,3) -- fina + end + + local list = description.kerns + if list then + for lookup, krn in next, list do -- ref to glyph, saves lookup + local target = lookuphash[lookup] + if target then + target[unicode] = krn + else + lookuphash[lookup] = { [unicode] = krn } + end end - first, last, n = nil, nil, 0 end - elseif id == disc_code then - -- always in the middle - set_attribute(current,state,2) -- midi - last = current - else -- finish - if first and first == last then - set_attribute(last,state,4) -- isol - elseif last then - set_attribute(last,state,3) -- fina + + local list = description.anchors + if list then + for typ, anchors in next, list do -- types + if typ == "mark" or typ == "cexit" then -- or entry? + for name, anchor in next, anchors do + local lookups = anchor_to_lookup[name] + if lookups then + for lookup, _ in next, lookups do + local target = lookuphash[lookup] + if target then + target[unicode] = anchors + else + lookuphash[lookup] = { [unicode] = anchors } + end + end + end + end + end + end end - first, last, n = nil, nil, 0 + end - current = current.next - end - if first and first == last then - set_attribute(last,state,4) -- isol - elseif last then - set_attribute(last,state,3) -- fina + end - return head, done -end --- in the future we will use language/script attributes instead of the --- font related value, but then we also need dynamic features which is --- somewhat slower; and .. we need a chain of them +end -local function analyzeinitializer(tfmdata,value) -- attr - local script, language = otf.scriptandlanguage(tfmdata) -- attr - local action = initializers[script] - if action then - if type(action) == "function" then - return action(tfmdata,value) - else - local action = action[language] - if action then - return action(tfmdata,value) - end - end +local function split(replacement,original) + local result = { } + for i=1,#replacement do + result[original[i]] = replacement[i] end + return result end -local function analyzeprocessor(head,font,attr) - local tfmdata = fontdata[font] - local script, language = otf.scriptandlanguage(tfmdata,attr) - local action = methods[script] - if action then - if type(action) == "function" then - return action(head,font,attr) - else - action = action[language] - if action then - return action(head,font,attr) +local valid = { + coverage = { chainsub = true, chainpos = true, contextsub = true }, + reversecoverage = { reversesub = true }, + glyphs = { chainsub = true, chainpos = true }, +} + +local function prepare_contextchains(tfmdata) + local rawdata = tfmdata.shared.rawdata + local resources = rawdata.resources + local lookuphash = resources.lookuphash + local lookups = rawdata.lookups + if lookups then + for lookupname, lookupdata in next, rawdata.lookups do + local lookuptype = lookupdata.type + if lookuptype then + local rules = lookupdata.rules + if rules then + local format = lookupdata.format + local validformat = valid[format] + if not validformat then + report_prepare("unsupported format %s",format) + elseif not validformat[lookuptype] then + -- todo: dejavu-serif has one (but i need to see what use it has) + report_prepare("unsupported %s %s for %s",format,lookuptype,lookupname) + else + local contexts = lookuphash[lookupname] + if not contexts then + contexts = { } + lookuphash[lookupname] = contexts + end + local t, nt = { }, 0 + for nofrules=1,#rules do + local rule = rules[nofrules] + local current = rule.current + local before = rule.before + local after = rule.after + local replacements = rule.replacements + local sequence = { } + local nofsequences = 0 + -- Wventually we can store start, stop and sequence in the cached file + -- but then less sharing takes place so best not do that without a lot + -- of profiling so let's forget about it. + if before then + for n=1,#before do + nofsequences = nofsequences + 1 + sequence[nofsequences] = before[n] + end + end + local start = nofsequences + 1 + for n=1,#current do + nofsequences = nofsequences + 1 + sequence[nofsequences] = current[n] + end + local stop = nofsequences + if after then + for n=1,#after do + nofsequences = nofsequences + 1 + sequence[nofsequences] = after[n] + end + end + if sequence[1] then + -- Replacements only happen with reverse lookups as they are single only. We + -- could pack them into current (replacement value instead of true) and then + -- use sequence[start] instead but it's somewhat ugly. + nt = nt + 1 + t[nt] = { nofrules, lookuptype, sequence, start, stop, rule.lookups, replacements } + for unic, _ in next, sequence[start] do + local cu = contexts[unic] + if not cu then + contexts[unic] = t + end + end + end + end + end + else + -- no rules + end + else + report_prepare("missing lookuptype for %s",lookupname) end end end - return head, false end -registerotffeature { - name = "analyze", - description = "analysis of (for instance) character classes", - default = true, - initializers = { - node = analyzeinitializer, - }, - processors = { - position = 1, - node = analyzeprocessor, - } -} - --- latin - -methods.latn = analyzers.setstate - --- this info eventually will go into char-def and we will have a state --- table for generic then - -local zwnj = 0x200C -local zwj = 0x200D - -local isol = { - [0x0600] = true, [0x0601] = true, [0x0602] = true, [0x0603] = true, - [0x0608] = true, [0x060B] = true, [0x0621] = true, [0x0674] = true, - [0x06DD] = true, [zwnj] = true, -} - -local isol_fina = { - [0x0622] = true, [0x0623] = true, [0x0624] = true, [0x0625] = true, - [0x0627] = true, [0x0629] = true, [0x062F] = true, [0x0630] = true, - [0x0631] = true, [0x0632] = true, [0x0648] = true, [0x0671] = true, - [0x0672] = true, [0x0673] = true, [0x0675] = true, [0x0676] = true, - [0x0677] = true, [0x0688] = true, [0x0689] = true, [0x068A] = true, - [0x068B] = true, [0x068C] = true, [0x068D] = true, [0x068E] = true, - [0x068F] = true, [0x0690] = true, [0x0691] = true, [0x0692] = true, - [0x0693] = true, [0x0694] = true, [0x0695] = true, [0x0696] = true, - [0x0697] = true, [0x0698] = true, [0x0699] = true, [0x06C0] = true, - [0x06C3] = true, [0x06C4] = true, [0x06C5] = true, [0x06C6] = true, - [0x06C7] = true, [0x06C8] = true, [0x06C9] = true, [0x06CA] = true, - [0x06CB] = true, [0x06CD] = true, [0x06CF] = true, [0x06D2] = true, - [0x06D3] = true, [0x06D5] = true, [0x06EE] = true, [0x06EF] = true, - [0x0759] = true, [0x075A] = true, [0x075B] = true, [0x076B] = true, - [0x076C] = true, [0x0771] = true, [0x0773] = true, [0x0774] = true, - [0x0778] = true, [0x0779] = true, [0xFEF5] = true, [0xFEF7] = true, - [0xFEF9] = true, [0xFEFB] = true, - - -- syriac - - [0x0710] = true, [0x0715] = true, [0x0716] = true, [0x0717] = true, - [0x0718] = true, [0x0719] = true, [0x0728] = true, [0x072A] = true, - [0x072C] = true, [0x071E] = true, -} +-- we can consider lookuphash == false (initialized but empty) vs lookuphash == table -local isol_fina_medi_init = { - [0x0626] = true, [0x0628] = true, [0x062A] = true, [0x062B] = true, - [0x062C] = true, [0x062D] = true, [0x062E] = true, [0x0633] = true, - [0x0634] = true, [0x0635] = true, [0x0636] = true, [0x0637] = true, - [0x0638] = true, [0x0639] = true, [0x063A] = true, [0x063B] = true, - [0x063C] = true, [0x063D] = true, [0x063E] = true, [0x063F] = true, - [0x0640] = true, [0x0641] = true, [0x0642] = true, [0x0643] = true, - [0x0644] = true, [0x0645] = true, [0x0646] = true, [0x0647] = true, - [0x0649] = true, [0x064A] = true, [0x066E] = true, [0x066F] = true, - [0x0678] = true, [0x0679] = true, [0x067A] = true, [0x067B] = true, - [0x067C] = true, [0x067D] = true, [0x067E] = true, [0x067F] = true, - [0x0680] = true, [0x0681] = true, [0x0682] = true, [0x0683] = true, - [0x0684] = true, [0x0685] = true, [0x0686] = true, [0x0687] = true, - [0x069A] = true, [0x069B] = true, [0x069C] = true, [0x069D] = true, - [0x069E] = true, [0x069F] = true, [0x06A0] = true, [0x06A1] = true, - [0x06A2] = true, [0x06A3] = true, [0x06A4] = true, [0x06A5] = true, - [0x06A6] = true, [0x06A7] = true, [0x06A8] = true, [0x06A9] = true, - [0x06AA] = true, [0x06AB] = true, [0x06AC] = true, [0x06AD] = true, - [0x06AE] = true, [0x06AF] = true, [0x06B0] = true, [0x06B1] = true, - [0x06B2] = true, [0x06B3] = true, [0x06B4] = true, [0x06B5] = true, - [0x06B6] = true, [0x06B7] = true, [0x06B8] = true, [0x06B9] = true, - [0x06BA] = true, [0x06BB] = true, [0x06BC] = true, [0x06BD] = true, - [0x06BE] = true, [0x06BF] = true, [0x06C1] = true, [0x06C2] = true, - [0x06CC] = true, [0x06CE] = true, [0x06D0] = true, [0x06D1] = true, - [0x06FA] = true, [0x06FB] = true, [0x06FC] = true, [0x06FF] = true, - [0x0750] = true, [0x0751] = true, [0x0752] = true, [0x0753] = true, - [0x0754] = true, [0x0755] = true, [0x0756] = true, [0x0757] = true, - [0x0758] = true, [0x075C] = true, [0x075D] = true, [0x075E] = true, - [0x075F] = true, [0x0760] = true, [0x0761] = true, [0x0762] = true, - [0x0763] = true, [0x0764] = true, [0x0765] = true, [0x0766] = true, - [0x0767] = true, [0x0768] = true, [0x0769] = true, [0x076A] = true, - [0x076D] = true, [0x076E] = true, [0x076F] = true, [0x0770] = true, - [0x0772] = true, [0x0775] = true, [0x0776] = true, [0x0777] = true, - [0x077A] = true, [0x077B] = true, [0x077C] = true, [0x077D] = true, - [0x077E] = true, [0x077F] = true, +local function featuresinitializer(tfmdata,value) + if true then -- value then + -- beware we need to use the topmost properties table + local rawdata = tfmdata.shared.rawdata + local properties = rawdata.properties + if not properties.initialized then + local starttime = trace_preparing and os.clock() + local resources = rawdata.resources + resources.lookuphash = resources.lookuphash or { } + prepare_contextchains(tfmdata) + prepare_lookups(tfmdata) + properties.initialized = true + if trace_preparing then + report_prepare("preparation time is %0.3f seconds for %s",os.clock()-starttime,tfmdata.properties.fullname or "?") + end + end + end +end - -- syriac +registerotffeature { + name = "features", + description = "features", + default = true, + initializers = { + position = 1, + node = featuresinitializer, + }, + processors = { + node = featuresprocessor, + } +} - [0x0712] = true, [0x0713] = true, [0x0714] = true, [0x071A] = true, - [0x071B] = true, [0x071C] = true, [0x071D] = true, [0x071F] = true, - [0x0720] = true, [0x0721] = true, [0x0722] = true, [0x0723] = true, - [0x0724] = true, [0x0725] = true, [0x0726] = true, [0x0727] = true, - [0x0729] = true, [0x072B] = true, +-- this will change but is needed for an experiment: - -- also +otf.handlers = handlers - [zwj] = true, +end -- closure + +do -- begin closure to overcome local limits and interference + +if not modules then modules = { } end modules ['luatex-fonts-chr'] = { + version = 1.001, + comment = "companion to luatex-fonts.lua", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +if context then + texio.write_nl("fatal error: this module is not for context") + os.exit() +end + +characters = characters or { } +characters.categories = { + [0x0300]="mn", + [0x0301]="mn", + [0x0302]="mn", + [0x0303]="mn", + [0x0304]="mn", + [0x0305]="mn", + [0x0306]="mn", + [0x0307]="mn", + [0x0308]="mn", + [0x0309]="mn", + [0x030A]="mn", + [0x030B]="mn", + [0x030C]="mn", + [0x030D]="mn", + [0x030E]="mn", + [0x030F]="mn", + [0x0310]="mn", + [0x0311]="mn", + [0x0312]="mn", + [0x0313]="mn", + [0x0314]="mn", + [0x0315]="mn", + [0x0316]="mn", + [0x0317]="mn", + [0x0318]="mn", + [0x0319]="mn", + [0x031A]="mn", + [0x031B]="mn", + [0x031C]="mn", + [0x031D]="mn", + [0x031E]="mn", + [0x031F]="mn", + [0x0320]="mn", + [0x0321]="mn", + [0x0322]="mn", + [0x0323]="mn", + [0x0324]="mn", + [0x0325]="mn", + [0x0326]="mn", + [0x0327]="mn", + [0x0328]="mn", + [0x0329]="mn", + [0x032A]="mn", + [0x032B]="mn", + [0x032C]="mn", + [0x032D]="mn", + [0x032E]="mn", + [0x032F]="mn", + [0x0330]="mn", + [0x0331]="mn", + [0x0332]="mn", + [0x0333]="mn", + [0x0334]="mn", + [0x0335]="mn", + [0x0336]="mn", + [0x0337]="mn", + [0x0338]="mn", + [0x0339]="mn", + [0x033A]="mn", + [0x033B]="mn", + [0x033C]="mn", + [0x033D]="mn", + [0x033E]="mn", + [0x033F]="mn", + [0x0340]="mn", + [0x0341]="mn", + [0x0342]="mn", + [0x0343]="mn", + [0x0344]="mn", + [0x0345]="mn", + [0x0346]="mn", + [0x0347]="mn", + [0x0348]="mn", + [0x0349]="mn", + [0x034A]="mn", + [0x034B]="mn", + [0x034C]="mn", + [0x034D]="mn", + [0x034E]="mn", + [0x034F]="mn", + [0x0350]="mn", + [0x0351]="mn", + [0x0352]="mn", + [0x0353]="mn", + [0x0354]="mn", + [0x0355]="mn", + [0x0356]="mn", + [0x0357]="mn", + [0x0358]="mn", + [0x0359]="mn", + [0x035A]="mn", + [0x035B]="mn", + [0x035C]="mn", + [0x035D]="mn", + [0x035E]="mn", + [0x035F]="mn", + [0x0360]="mn", + [0x0361]="mn", + [0x0362]="mn", + [0x0363]="mn", + [0x0364]="mn", + [0x0365]="mn", + [0x0366]="mn", + [0x0367]="mn", + [0x0368]="mn", + [0x0369]="mn", + [0x036A]="mn", + [0x036B]="mn", + [0x036C]="mn", + [0x036D]="mn", + [0x036E]="mn", + [0x036F]="mn", + [0x0483]="mn", + [0x0484]="mn", + [0x0485]="mn", + [0x0486]="mn", + [0x0591]="mn", + [0x0592]="mn", + [0x0593]="mn", + [0x0594]="mn", + [0x0595]="mn", + [0x0596]="mn", + [0x0597]="mn", + [0x0598]="mn", + [0x0599]="mn", + [0x059A]="mn", + [0x059B]="mn", + [0x059C]="mn", + [0x059D]="mn", + [0x059E]="mn", + [0x059F]="mn", + [0x05A0]="mn", + [0x05A1]="mn", + [0x05A2]="mn", + [0x05A3]="mn", + [0x05A4]="mn", + [0x05A5]="mn", + [0x05A6]="mn", + [0x05A7]="mn", + [0x05A8]="mn", + [0x05A9]="mn", + [0x05AA]="mn", + [0x05AB]="mn", + [0x05AC]="mn", + [0x05AD]="mn", + [0x05AE]="mn", + [0x05AF]="mn", + [0x05B0]="mn", + [0x05B1]="mn", + [0x05B2]="mn", + [0x05B3]="mn", + [0x05B4]="mn", + [0x05B5]="mn", + [0x05B6]="mn", + [0x05B7]="mn", + [0x05B8]="mn", + [0x05B9]="mn", + [0x05BA]="mn", + [0x05BB]="mn", + [0x05BC]="mn", + [0x05BD]="mn", + [0x05BF]="mn", + [0x05C1]="mn", + [0x05C2]="mn", + [0x05C4]="mn", + [0x05C5]="mn", + [0x05C7]="mn", + [0x0610]="mn", + [0x0611]="mn", + [0x0612]="mn", + [0x0613]="mn", + [0x0614]="mn", + [0x0615]="mn", + [0x064B]="mn", + [0x064C]="mn", + [0x064D]="mn", + [0x064E]="mn", + [0x064F]="mn", + [0x0650]="mn", + [0x0651]="mn", + [0x0652]="mn", + [0x0653]="mn", + [0x0654]="mn", + [0x0655]="mn", + [0x0656]="mn", + [0x0657]="mn", + [0x0658]="mn", + [0x0659]="mn", + [0x065A]="mn", + [0x065B]="mn", + [0x065C]="mn", + [0x065D]="mn", + [0x065E]="mn", + [0x0670]="mn", + [0x06D6]="mn", + [0x06D7]="mn", + [0x06D8]="mn", + [0x06D9]="mn", + [0x06DA]="mn", + [0x06DB]="mn", + [0x06DC]="mn", + [0x06DF]="mn", + [0x06E0]="mn", + [0x06E1]="mn", + [0x06E2]="mn", + [0x06E3]="mn", + [0x06E4]="mn", + [0x06E7]="mn", + [0x06E8]="mn", + [0x06EA]="mn", + [0x06EB]="mn", + [0x06EC]="mn", + [0x06ED]="mn", + [0x0711]="mn", + [0x0730]="mn", + [0x0731]="mn", + [0x0732]="mn", + [0x0733]="mn", + [0x0734]="mn", + [0x0735]="mn", + [0x0736]="mn", + [0x0737]="mn", + [0x0738]="mn", + [0x0739]="mn", + [0x073A]="mn", + [0x073B]="mn", + [0x073C]="mn", + [0x073D]="mn", + [0x073E]="mn", + [0x073F]="mn", + [0x0740]="mn", + [0x0741]="mn", + [0x0742]="mn", + [0x0743]="mn", + [0x0744]="mn", + [0x0745]="mn", + [0x0746]="mn", + [0x0747]="mn", + [0x0748]="mn", + [0x0749]="mn", + [0x074A]="mn", + [0x07A6]="mn", + [0x07A7]="mn", + [0x07A8]="mn", + [0x07A9]="mn", + [0x07AA]="mn", + [0x07AB]="mn", + [0x07AC]="mn", + [0x07AD]="mn", + [0x07AE]="mn", + [0x07AF]="mn", + [0x07B0]="mn", + [0x07EB]="mn", + [0x07EC]="mn", + [0x07ED]="mn", + [0x07EE]="mn", + [0x07EF]="mn", + [0x07F0]="mn", + [0x07F1]="mn", + [0x07F2]="mn", + [0x07F3]="mn", + [0x0901]="mn", + [0x0902]="mn", + [0x093C]="mn", + [0x0941]="mn", + [0x0942]="mn", + [0x0943]="mn", + [0x0944]="mn", + [0x0945]="mn", + [0x0946]="mn", + [0x0947]="mn", + [0x0948]="mn", + [0x094D]="mn", + [0x0951]="mn", + [0x0952]="mn", + [0x0953]="mn", + [0x0954]="mn", + [0x0962]="mn", + [0x0963]="mn", + [0x0981]="mn", + [0x09BC]="mn", + [0x09C1]="mn", + [0x09C2]="mn", + [0x09C3]="mn", + [0x09C4]="mn", + [0x09CD]="mn", + [0x09E2]="mn", + [0x09E3]="mn", + [0x0A01]="mn", + [0x0A02]="mn", + [0x0A3C]="mn", + [0x0A41]="mn", + [0x0A42]="mn", + [0x0A47]="mn", + [0x0A48]="mn", + [0x0A4B]="mn", + [0x0A4C]="mn", + [0x0A4D]="mn", + [0x0A70]="mn", + [0x0A71]="mn", + [0x0A81]="mn", + [0x0A82]="mn", + [0x0ABC]="mn", + [0x0AC1]="mn", + [0x0AC2]="mn", + [0x0AC3]="mn", + [0x0AC4]="mn", + [0x0AC5]="mn", + [0x0AC7]="mn", + [0x0AC8]="mn", + [0x0ACD]="mn", + [0x0AE2]="mn", + [0x0AE3]="mn", + [0x0B01]="mn", + [0x0B3C]="mn", + [0x0B3F]="mn", + [0x0B41]="mn", + [0x0B42]="mn", + [0x0B43]="mn", + [0x0B4D]="mn", + [0x0B56]="mn", + [0x0B82]="mn", + [0x0BC0]="mn", + [0x0BCD]="mn", + [0x0C3E]="mn", + [0x0C3F]="mn", + [0x0C40]="mn", + [0x0C46]="mn", + [0x0C47]="mn", + [0x0C48]="mn", + [0x0C4A]="mn", + [0x0C4B]="mn", + [0x0C4C]="mn", + [0x0C4D]="mn", + [0x0C55]="mn", + [0x0C56]="mn", + [0x0CBC]="mn", + [0x0CBF]="mn", + [0x0CC6]="mn", + [0x0CCC]="mn", + [0x0CCD]="mn", + [0x0CE2]="mn", + [0x0CE3]="mn", + [0x0D41]="mn", + [0x0D42]="mn", + [0x0D43]="mn", + [0x0D4D]="mn", + [0x0DCA]="mn", + [0x0DD2]="mn", + [0x0DD3]="mn", + [0x0DD4]="mn", + [0x0DD6]="mn", + [0x0E31]="mn", + [0x0E34]="mn", + [0x0E35]="mn", + [0x0E36]="mn", + [0x0E37]="mn", + [0x0E38]="mn", + [0x0E39]="mn", + [0x0E3A]="mn", + [0x0E47]="mn", + [0x0E48]="mn", + [0x0E49]="mn", + [0x0E4A]="mn", + [0x0E4B]="mn", + [0x0E4C]="mn", + [0x0E4D]="mn", + [0x0E4E]="mn", + [0x0EB1]="mn", + [0x0EB4]="mn", + [0x0EB5]="mn", + [0x0EB6]="mn", + [0x0EB7]="mn", + [0x0EB8]="mn", + [0x0EB9]="mn", + [0x0EBB]="mn", + [0x0EBC]="mn", + [0x0EC8]="mn", + [0x0EC9]="mn", + [0x0ECA]="mn", + [0x0ECB]="mn", + [0x0ECC]="mn", + [0x0ECD]="mn", + [0x0F18]="mn", + [0x0F19]="mn", + [0x0F35]="mn", + [0x0F37]="mn", + [0x0F39]="mn", + [0x0F71]="mn", + [0x0F72]="mn", + [0x0F73]="mn", + [0x0F74]="mn", + [0x0F75]="mn", + [0x0F76]="mn", + [0x0F77]="mn", + [0x0F78]="mn", + [0x0F79]="mn", + [0x0F7A]="mn", + [0x0F7B]="mn", + [0x0F7C]="mn", + [0x0F7D]="mn", + [0x0F7E]="mn", + [0x0F80]="mn", + [0x0F81]="mn", + [0x0F82]="mn", + [0x0F83]="mn", + [0x0F84]="mn", + [0x0F86]="mn", + [0x0F87]="mn", + [0x0F90]="mn", + [0x0F91]="mn", + [0x0F92]="mn", + [0x0F93]="mn", + [0x0F94]="mn", + [0x0F95]="mn", + [0x0F96]="mn", + [0x0F97]="mn", + [0x0F99]="mn", + [0x0F9A]="mn", + [0x0F9B]="mn", + [0x0F9C]="mn", + [0x0F9D]="mn", + [0x0F9E]="mn", + [0x0F9F]="mn", + [0x0FA0]="mn", + [0x0FA1]="mn", + [0x0FA2]="mn", + [0x0FA3]="mn", + [0x0FA4]="mn", + [0x0FA5]="mn", + [0x0FA6]="mn", + [0x0FA7]="mn", + [0x0FA8]="mn", + [0x0FA9]="mn", + [0x0FAA]="mn", + [0x0FAB]="mn", + [0x0FAC]="mn", + [0x0FAD]="mn", + [0x0FAE]="mn", + [0x0FAF]="mn", + [0x0FB0]="mn", + [0x0FB1]="mn", + [0x0FB2]="mn", + [0x0FB3]="mn", + [0x0FB4]="mn", + [0x0FB5]="mn", + [0x0FB6]="mn", + [0x0FB7]="mn", + [0x0FB8]="mn", + [0x0FB9]="mn", + [0x0FBA]="mn", + [0x0FBB]="mn", + [0x0FBC]="mn", + [0x0FC6]="mn", + [0x102D]="mn", + [0x102E]="mn", + [0x102F]="mn", + [0x1030]="mn", + [0x1032]="mn", + [0x1036]="mn", + [0x1037]="mn", + [0x1039]="mn", + [0x1058]="mn", + [0x1059]="mn", + [0x135F]="mn", + [0x1712]="mn", + [0x1713]="mn", + [0x1714]="mn", + [0x1732]="mn", + [0x1733]="mn", + [0x1734]="mn", + [0x1752]="mn", + [0x1753]="mn", + [0x1772]="mn", + [0x1773]="mn", + [0x17B7]="mn", + [0x17B8]="mn", + [0x17B9]="mn", + [0x17BA]="mn", + [0x17BB]="mn", + [0x17BC]="mn", + [0x17BD]="mn", + [0x17C6]="mn", + [0x17C9]="mn", + [0x17CA]="mn", + [0x17CB]="mn", + [0x17CC]="mn", + [0x17CD]="mn", + [0x17CE]="mn", + [0x17CF]="mn", + [0x17D0]="mn", + [0x17D1]="mn", + [0x17D2]="mn", + [0x17D3]="mn", + [0x17DD]="mn", + [0x180B]="mn", + [0x180C]="mn", + [0x180D]="mn", + [0x18A9]="mn", + [0x1920]="mn", + [0x1921]="mn", + [0x1922]="mn", + [0x1927]="mn", + [0x1928]="mn", + [0x1932]="mn", + [0x1939]="mn", + [0x193A]="mn", + [0x193B]="mn", + [0x1A17]="mn", + [0x1A18]="mn", + [0x1B00]="mn", + [0x1B01]="mn", + [0x1B02]="mn", + [0x1B03]="mn", + [0x1B34]="mn", + [0x1B36]="mn", + [0x1B37]="mn", + [0x1B38]="mn", + [0x1B39]="mn", + [0x1B3A]="mn", + [0x1B3C]="mn", + [0x1B42]="mn", + [0x1B6B]="mn", + [0x1B6C]="mn", + [0x1B6D]="mn", + [0x1B6E]="mn", + [0x1B6F]="mn", + [0x1B70]="mn", + [0x1B71]="mn", + [0x1B72]="mn", + [0x1B73]="mn", + [0x1DC0]="mn", + [0x1DC1]="mn", + [0x1DC2]="mn", + [0x1DC3]="mn", + [0x1DC4]="mn", + [0x1DC5]="mn", + [0x1DC6]="mn", + [0x1DC7]="mn", + [0x1DC8]="mn", + [0x1DC9]="mn", + [0x1DCA]="mn", + [0x1DFE]="mn", + [0x1DFF]="mn", + [0x20D0]="mn", + [0x20D1]="mn", + [0x20D2]="mn", + [0x20D3]="mn", + [0x20D4]="mn", + [0x20D5]="mn", + [0x20D6]="mn", + [0x20D7]="mn", + [0x20D8]="mn", + [0x20D9]="mn", + [0x20DA]="mn", + [0x20DB]="mn", + [0x20DC]="mn", + [0x20E1]="mn", + [0x20E5]="mn", + [0x20E6]="mn", + [0x20E7]="mn", + [0x20E8]="mn", + [0x20E9]="mn", + [0x20EA]="mn", + [0x20EB]="mn", + [0x20EC]="mn", + [0x20ED]="mn", + [0x20EE]="mn", + [0x20EF]="mn", + [0x302A]="mn", + [0x302B]="mn", + [0x302C]="mn", + [0x302D]="mn", + [0x302E]="mn", + [0x302F]="mn", + [0x3099]="mn", + [0x309A]="mn", + [0xA806]="mn", + [0xA80B]="mn", + [0xA825]="mn", + [0xA826]="mn", + [0xFB1E]="mn", + [0xFE00]="mn", + [0xFE01]="mn", + [0xFE02]="mn", + [0xFE03]="mn", + [0xFE04]="mn", + [0xFE05]="mn", + [0xFE06]="mn", + [0xFE07]="mn", + [0xFE08]="mn", + [0xFE09]="mn", + [0xFE0A]="mn", + [0xFE0B]="mn", + [0xFE0C]="mn", + [0xFE0D]="mn", + [0xFE0E]="mn", + [0xFE0F]="mn", + [0xFE20]="mn", + [0xFE21]="mn", + [0xFE22]="mn", + [0xFE23]="mn", } -local arab_warned = { } - - --- todo: gref - -local function warning(current,what) - local char = current.char - if not arab_warned[char] then - log.report("analyze","arab: character %s (U+%05X) has no %s class", char, char, what) - arab_warned[char] = true - end -end - -function methods.nocolor(head,font,attr) - for n in traverse_id(glyph_code,head) do - if not font or n.font == font then - resetnodecolor(n) - end - end - return head, true -end - -local function finish(first,last) - if last then - if first == last then - local fc = first.char - if isol_fina_medi_init[fc] or isol_fina[fc] then - set_attribute(first,state,4) -- isol - if trace_analyzing then setnodecolor(first,"font:isol") end - else - warning(first,"isol") - set_attribute(first,state,0) -- error - if trace_analyzing then resetnodecolor(first) end - end - else - local lc = last.char - if isol_fina_medi_init[lc] or isol_fina[lc] then -- why isol here ? - -- if laststate == 1 or laststate == 2 or laststate == 4 then - set_attribute(last,state,3) -- fina - if trace_analyzing then setnodecolor(last,"font:fina") end - else - warning(last,"fina") - set_attribute(last,state,0) -- error - if trace_analyzing then resetnodecolor(last) end - end - end - first, last = nil, nil - elseif first then - -- first and last are either both set so we never com here - local fc = first.char - if isol_fina_medi_init[fc] or isol_fina[fc] then - set_attribute(first,state,4) -- isol - if trace_analyzing then setnodecolor(first,"font:isol") end - else - warning(first,"isol") - set_attribute(first,state,0) -- error - if trace_analyzing then resetnodecolor(first) end - end - first = nil - end - return first, last -end - -function methods.arab(head,font,attr) -- maybe make a special version with no trace - local useunicodemarks = analyzers.useunicodemarks - local tfmdata = fontdata[font] - local marks = tfmdata.resources.marks - local first, last, current, done = nil, nil, head, false - while current do - if current.id == glyph_code and current.subtype<256 and current.font == font and not has_attribute(current,state) then - done = true - local char = current.char - if marks[char] or (useunicodemarks and categories[char] == "mn") then - set_attribute(current,state,5) -- mark - if trace_analyzing then setnodecolor(current,"font:mark") end - elseif isol[char] then -- can be zwj or zwnj too - first, last = finish(first,last) - set_attribute(current,state,4) -- isol - if trace_analyzing then setnodecolor(current,"font:isol") end - first, last = nil, nil - elseif not first then - if isol_fina_medi_init[char] then - set_attribute(current,state,1) -- init - if trace_analyzing then setnodecolor(current,"font:init") end - first, last = first or current, current - elseif isol_fina[char] then - set_attribute(current,state,4) -- isol - if trace_analyzing then setnodecolor(current,"font:isol") end - first, last = nil, nil - else -- no arab - first, last = finish(first,last) - end - elseif isol_fina_medi_init[char] then - first, last = first or current, current - set_attribute(current,state,2) -- medi - if trace_analyzing then setnodecolor(current,"font:medi") end - elseif isol_fina[char] then - if not has_attribute(last,state,1) then - -- tricky, we need to check what last may be ! - set_attribute(last,state,2) -- medi - if trace_analyzing then setnodecolor(last,"font:medi") end - end - set_attribute(current,state,3) -- fina - if trace_analyzing then setnodecolor(current,"font:fina") end - first, last = nil, nil - elseif char >= 0x0600 and char <= 0x06FF then - if trace_analyzing then setnodecolor(current,"font:rest") end - first, last = finish(first,last) - else --no - first, last = finish(first,last) - end - else - first, last = finish(first,last) - end - current = current.next - end - first, last = finish(first,last) - return head, done -end - -methods.syrc = methods.arab - -directives.register("otf.analyze.useunicodemarks",function(v) - analyzers.useunicodemarks = v -end) - end -- closure do -- begin closure to overcome local limits and interference @@ -12185,6 +12562,8 @@ if not modules then modules = { } end modules ['font-def'] = { license = "see context related readme files" } +-- We can overload some of the definers.functions so we don't local them. + local concat = table.concat local format, gmatch, match, find, lower, gsub = string.format, string.gmatch, string.match, string.find, string.lower, string.gsub local tostring, next = tostring, next @@ -12221,7 +12600,6 @@ definers.methods = definers.methods or { } local internalized = allocate() -- internal tex numbers (private) - local loadedfonts = constructors.loadedfonts local designsizes = constructors.designsizes @@ -12251,7 +12629,7 @@ and prepares a table that will move along as we proceed.

-- name name(sub) name(sub)*spec name*spec -- name@spec*oeps -local splitter, splitspecifiers = nil, "" +local splitter, splitspecifiers = nil, "" -- not so nice local P, C, S, Cc = lpeg.P, lpeg.C, lpeg.S, lpeg.Cc @@ -12262,7 +12640,7 @@ local space = P(" ") definers.defaultlookup = "file" -local prefixpattern = P(false) +local prefixpattern = P(false) local function addspecifier(symbol) splitspecifiers = splitspecifiers .. symbol @@ -12298,12 +12676,12 @@ function definers.registersplit(symbol,action,verbosename) end end -function definers.makespecification(specification,lookup,name,sub,method,detail,size) +local function makespecification(specification,lookup,name,sub,method,detail,size) size = size or 655360 if trace_defining then report_defining("%s -> lookup: %s, name: %s, sub: %s, method: %s, detail: %s", - specification, (lookup ~= "" and lookup) or "[file]", (name ~= "" and name) or "-", - (sub ~= "" and sub) or "-", (method ~= "" and method) or "-", (detail ~= "" and detail) or "-") + specification, lookup ~= "" and lookup or "[file]", name ~= "" and name or "-", + sub ~= "" and sub or "-", method ~= "" and method or "-", detail ~= "" and detail or "-") end if not lookup or lookup == "" then lookup = definers.defaultlookup @@ -12323,10 +12701,13 @@ function definers.makespecification(specification,lookup,name,sub,method,detail, return t end + +definers.makespecification = makespecification + function definers.analyze(specification, size) -- can be optimized with locals local lookup, name, sub, method, detail = getspecification(specification or "") - return definers.makespecification(specification, lookup, name, sub, method, detail, size) + return makespecification(specification, lookup, name, sub, method, detail, size) end --[[ldx-- @@ -12373,7 +12754,7 @@ function resolvers.spec(specification) if resolved then specification.resolved = resolved specification.sub = sub - specification.forced = file.extname(resolved) + specification.forced = file.suffix(resolved) specification.name = file.removesuffix(resolved) end else @@ -12421,12 +12802,13 @@ specification yet.

function definers.applypostprocessors(tfmdata) local postprocessors = tfmdata.postprocessors if postprocessors then + local properties = tfmdata.properties for i=1,#postprocessors do local extrahash = postprocessors[i](tfmdata) -- after scaling etc if type(extrahash) == "string" and extrahash ~= "" then -- e.g. a reencoding needs this extrahash = gsub(lower(extrahash),"[^a-z]","-") - tfmdata.properties.fullname = format("%s-%s",tfmdata.properties.fullname,extrahash) + properties.fullname = format("%s-%s",properties.fullname,extrahash) end end end @@ -12744,18 +13126,14 @@ local otffeatures = fonts.constructors.newfeatures("otf") local function initializeitlc(tfmdata,value) if value then - -- the magic 40 and it formula come from Dohyun Kim - local parameters = tfmdata.parameters + -- the magic 40 and it formula come from Dohyun Kim but we might need another guess + local parameters = tfmdata.parameters local italicangle = parameters.italicangle if italicangle and italicangle ~= 0 then - local uwidth = (parameters.uwidth or 40)/2 - for unicode, d in next, tfmdata.descriptions do - local it = d.boundingbox[3] - d.width + uwidth - if it ~= 0 then - d.italic = it - end - end - tfmdata.properties.hasitalics = true + local properties = tfmdata.properties + local factor = tonumber(value) or 1 + properties.hasitalics = true + properties.autoitalicamount = factor * (parameters.uwidth or 40)/2 end end end diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index f5045a4e3..535519db7 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -176,9 +176,9 @@ else loadmodule('font-otf.lua') loadmodule('font-otb.lua') loadmodule('node-inj.lua') -- will be replaced (luatex >= .70) + loadmodule('font-ota.lua') loadmodule('font-otn.lua') -- loadmodule('luatex-fonts-chr.lua') - loadmodule('font-ota.lua') loadmodule('luatex-fonts-lua.lua') loadmodule('font-def.lua') loadmodule('luatex-fonts-def.lua') diff --git a/tex/generic/context/luatex/luatex-mplib.tex b/tex/generic/context/luatex/luatex-mplib.tex index ef6dfff95..8af9f2d8a 100644 --- a/tex/generic/context/luatex/luatex-mplib.tex +++ b/tex/generic/context/luatex/luatex-mplib.tex @@ -31,8 +31,15 @@ \def\setmplibformat#1{\def\mplibformat{#1}} \def\setupmplibcatcodes - {\catcode`\{=12 \catcode`\}=12 \catcode`\#=12 \catcode`\^=12 \catcode`\~=12 - \catcode`\_=12 \catcode`\%=12 \catcode`\&=12 \catcode`\$=12 } + {\catcode`\{=12 % could be optional .. not really needed + \catcode`\}=12 % could be optional .. not really needed + \catcode`\#=12 + \catcode`\^=12 + \catcode`\~=12 + \catcode`\_=12 + \catcode`\%=12 + \catcode`\&=12 + \catcode`\$=12 } \def\mplibcode {\bgroup -- cgit v1.2.3