summaryrefslogtreecommitdiff
path: root/tex/context/base/font-ota.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/font-ota.lua')
-rw-r--r--tex/context/base/font-ota.lua427
1 files changed, 204 insertions, 223 deletions
diff --git a/tex/context/base/font-ota.lua b/tex/context/base/font-ota.lua
index 9af5a3347..1f1534870 100644
--- a/tex/context/base/font-ota.lua
+++ b/tex/context/base/font-ota.lua
@@ -1,4 +1,4 @@
-if not modules then modules = { } end modules ['font-ota'] = {
+if not modules then modules = { } end modules ['font-otx'] = {
version = 1.001,
comment = "companion to font-otf.lua (analysing)",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['font-ota'] = {
license = "see context related readme files"
}
--- this might become scrp-*.lua
+-- context only
local type = type
@@ -26,21 +26,35 @@ local methods = allocate()
analyzers.initializers = initializers
analyzers.methods = methods
-analyzers.useunicodemarks = false
+---------.useunicodemarks = false
local a_state = attributes.private('state')
+local nuts = nodes.nuts
+local tonut = nuts.tonut
+
+local getfield = nuts.getfield
+local getnext = nuts.getnext
+local getprev = nuts.getprev
+local getid = nuts.getid
+local getprop = nuts.getprop
+local setprop = nuts.setprop
+local getfont = nuts.getfont
+local getsubtype = nuts.getsubtype
+local getchar = nuts.getchar
+
+local traverse_id = nuts.traverse_id
+local traverse_node_list = nuts.traverse
+local end_of_math = nuts.end_of_math
+
local nodecodes = nodes.nodecodes
local glyph_code = nodecodes.glyph
local disc_code = nodecodes.disc
local math_code = nodecodes.math
-local traverse_id = node.traverse_id
-local traverse_node_list = node.traverse
-local end_of_math = node.end_of_math
-
local fontdata = fonts.hashes.identifiers
local categories = characters and characters.categories or { } -- sorry, only in context
+local chardata = characters and characters.data
local otffeatures = fonts.constructors.newfeatures("otf")
local registerotffeature = otffeatures.register
@@ -87,63 +101,70 @@ local features = {
pstf = s_pstf,
}
-analyzers.states = states
-analyzers.features = features
+analyzers.states = states
+analyzers.features = features
+analyzers.useunicodemarks = false
-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script
-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace
-function analyzers.setstate(head,font)
+function analyzers.setstate(head,font) -- we can skip math
local useunicodemarks = analyzers.useunicodemarks
local tfmdata = fontdata[font]
local descriptions = tfmdata.descriptions
local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean
+ current = tonut(current)
while current do
- local id = current.id
- if id == glyph_code and current.font == font then
+ local id = getid(current)
+ if id == glyph_code and getfont(current) == font then
done = true
- local char = current.char
+ local char = getchar(current)
local d = descriptions[char]
if d then
- if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then
+ if d.class == "mark" then
done = true
- current[a_state] = s_mark
+ setprop(current,a_state,s_mark)
+ elseif useunicodemarks and categories[char] == "mn" then
+ done = true
+ setprop(current,a_state,s_mark)
elseif n == 0 then
first, last, n = current, current, 1
- current[a_state] = s_init
+ setprop(current,a_state,s_init)
else
last, n = current, n+1
- current[a_state] = s_medi
+ setprop(current,a_state,s_medi)
end
else -- finish
if first and first == last then
- last[a_state] = s_isol
+ setprop(last,a_state,s_isol)
elseif last then
- last[a_state] = s_fina
+ setprop(last,a_state,s_fina)
end
first, last, n = nil, nil, 0
end
elseif id == disc_code then
- -- always in the middle
- current[a_state] = s_medi
+ -- always in the middle .. it doesn't make much sense to assign a property
+ -- here ... we might at some point decide to flag the components when present
+ -- but even then it's kind of bogus
+ setprop(current,a_state,s_medi)
last = current
else -- finish
if first and first == last then
- last[a_state] = s_isol
+ setprop(last,a_state,s_isol)
elseif last then
- last[a_state] = s_fina
+ setprop(last,a_state,s_fina)
end
first, last, n = nil, nil, 0
if id == math_code then
current = end_of_math(current)
end
end
- current = current.next
+ current = getnext(current)
end
if first and first == last then
- last[a_state] = s_isol
+ setprop(last,a_state,s_isol)
elseif last then
- last[a_state] = s_fina
+ setprop(last,a_state,s_fina)
end
return head, done
end
@@ -200,234 +221,194 @@ registerotffeature {
-- latin
methods.latn = analyzers.setstate
-
--- This info eventually can go into char-def and we will have a state
--- table for generic then (unicode recognized all states but in practice
--- only has only
---
--- isolated : isol
--- final : isol_fina
--- medial : isol_fina_medi_init
---
--- so in practice, without analyzer it's rather useless info which is
--- why having it in char-def makes only sense for special purposes (like)
--- like tracing cq. visualizing.
-
-local tatweel = 0x0640
-local zwnj = 0x200C
-local zwj = 0x200D
-
-local isolated = { -- isol
- [0x0600] = true, [0x0601] = true, [0x0602] = true, [0x0603] = true,
- [0x0604] = true,
- [0x0608] = true, [0x060B] = true, [0x0621] = true, [0x0674] = true,
- [0x06DD] = true,
- -- mandaic
- [0x0856] = true, [0x0858] = true, [0x0857] = true,
- -- n'ko
- [0x07FA] = true,
- -- also here:
- [zwnj] = true,
-}
-
-local final = { -- isol_fina
- [0x0622] = true, [0x0623] = true, [0x0624] = true, [0x0625] = true,
- [0x0627] = true, [0x0629] = true, [0x062F] = true, [0x0630] = true,
- [0x0631] = true, [0x0632] = true, [0x0648] = true, [0x0671] = true,
- [0x0672] = true, [0x0673] = true, [0x0675] = true, [0x0676] = true,
- [0x0677] = true, [0x0688] = true, [0x0689] = true, [0x068A] = true,
- [0x068B] = true, [0x068C] = true, [0x068D] = true, [0x068E] = true,
- [0x068F] = true, [0x0690] = true, [0x0691] = true, [0x0692] = true,
- [0x0693] = true, [0x0694] = true, [0x0695] = true, [0x0696] = true,
- [0x0697] = true, [0x0698] = true, [0x0699] = true, [0x06C0] = true,
- [0x06C3] = true, [0x06C4] = true, [0x06C5] = true, [0x06C6] = true,
- [0x06C7] = true, [0x06C8] = true, [0x06C9] = true, [0x06CA] = true,
- [0x06CB] = true, [0x06CD] = true, [0x06CF] = true, [0x06D2] = true,
- [0x06D3] = true, [0x06D5] = true, [0x06EE] = true, [0x06EF] = true,
- [0x0759] = true, [0x075A] = true, [0x075B] = true, [0x076B] = true,
- [0x076C] = true, [0x0771] = true, [0x0773] = true, [0x0774] = true,
- [0x0778] = true, [0x0779] = true,
- [0x08AA] = true, [0x08AB] = true, [0x08AC] = true,
- [0xFEF5] = true, [0xFEF7] = true, [0xFEF9] = true, [0xFEFB] = true,
- -- syriac
- [0x0710] = true, [0x0715] = true, [0x0716] = true, [0x0717] = true,
- [0x0718] = true, [0x0719] = true, [0x0728] = true, [0x072A] = true,
- [0x072C] = true, [0x071E] = true,
- [0x072F] = true, [0x074D] = true,
- -- mandaic
- [0x0840] = true, [0x0849] = true, [0x0854] = true, [0x0846] = true,
- [0x084F] = true
-}
-
-local medial = { -- isol_fina_medi_init
- [0x0626] = true, [0x0628] = true, [0x062A] = true, [0x062B] = true,
- [0x062C] = true, [0x062D] = true, [0x062E] = true, [0x0633] = true,
- [0x0634] = true, [0x0635] = true, [0x0636] = true, [0x0637] = true,
- [0x0638] = true, [0x0639] = true, [0x063A] = true, [0x063B] = true,
- [0x063C] = true, [0x063D] = true, [0x063E] = true, [0x063F] = true,
- [0x0641] = true, [0x0642] = true, [0x0643] = true,
- [0x0644] = true, [0x0645] = true, [0x0646] = true, [0x0647] = true,
- [0x0649] = true, [0x064A] = true, [0x066E] = true, [0x066F] = true,
- [0x0678] = true, [0x0679] = true, [0x067A] = true, [0x067B] = true,
- [0x067C] = true, [0x067D] = true, [0x067E] = true, [0x067F] = true,
- [0x0680] = true, [0x0681] = true, [0x0682] = true, [0x0683] = true,
- [0x0684] = true, [0x0685] = true, [0x0686] = true, [0x0687] = true,
- [0x069A] = true, [0x069B] = true, [0x069C] = true, [0x069D] = true,
- [0x069E] = true, [0x069F] = true, [0x06A0] = true, [0x06A1] = true,
- [0x06A2] = true, [0x06A3] = true, [0x06A4] = true, [0x06A5] = true,
- [0x06A6] = true, [0x06A7] = true, [0x06A8] = true, [0x06A9] = true,
- [0x06AA] = true, [0x06AB] = true, [0x06AC] = true, [0x06AD] = true,
- [0x06AE] = true, [0x06AF] = true, [0x06B0] = true, [0x06B1] = true,
- [0x06B2] = true, [0x06B3] = true, [0x06B4] = true, [0x06B5] = true,
- [0x06B6] = true, [0x06B7] = true, [0x06B8] = true, [0x06B9] = true,
- [0x06BA] = true, [0x06BB] = true, [0x06BC] = true, [0x06BD] = true,
- [0x06BE] = true, [0x06BF] = true, [0x06C1] = true, [0x06C2] = true,
- [0x06CC] = true, [0x06CE] = true, [0x06D0] = true, [0x06D1] = true,
- [0x06FA] = true, [0x06FB] = true, [0x06FC] = true, [0x06FF] = true,
- [0x0750] = true, [0x0751] = true, [0x0752] = true, [0x0753] = true,
- [0x0754] = true, [0x0755] = true, [0x0756] = true, [0x0757] = true,
- [0x0758] = true, [0x075C] = true, [0x075D] = true, [0x075E] = true,
- [0x075F] = true, [0x0760] = true, [0x0761] = true, [0x0762] = true,
- [0x0763] = true, [0x0764] = true, [0x0765] = true, [0x0766] = true,
- [0x0767] = true, [0x0768] = true, [0x0769] = true, [0x076A] = true,
- [0x076D] = true, [0x076E] = true, [0x076F] = true, [0x0770] = true,
- [0x0772] = true, [0x0775] = true, [0x0776] = true, [0x0777] = true,
- [0x077A] = true, [0x077B] = true, [0x077C] = true, [0x077D] = true,
- [0x077E] = true, [0x077F] = true,
- [0x08A0] = true, [0x08A2] = true, [0x08A4] = true, [0x08A5] = true,
- [0x08A6] = true, [0x0620] = true, [0x08A8] = true, [0x08A9] = true,
- [0x08A7] = true, [0x08A3] = true,
- -- syriac
- [0x0712] = true, [0x0713] = true, [0x0714] = true, [0x071A] = true,
- [0x071B] = true, [0x071C] = true, [0x071D] = true, [0x071F] = true,
- [0x0720] = true, [0x0721] = true, [0x0722] = true, [0x0723] = true,
- [0x0724] = true, [0x0725] = true, [0x0726] = true, [0x0727] = true,
- [0x0729] = true, [0x072B] = true, [0x072D] = true, [0x072E] = true,
- [0x074E] = true, [0x074F] = true,
- -- mandaic
- [0x0841] = true, [0x0842] = true, [0x0843] = true, [0x0844] = true,
- [0x0845] = true, [0x0847] = true, [0x0848] = true, [0x0855] = true,
- [0x0851] = true, [0x084E] = true, [0x084D] = true, [0x084A] = true,
- [0x084B] = true, [0x084C] = true, [0x0850] = true, [0x0852] = true,
- [0x0853] = true,
- -- n'ko
- [0x07D7] = true, [0x07E8] = true, [0x07D9] = true, [0x07EA] = true,
- [0x07CA] = true, [0x07DB] = true, [0x07CC] = true, [0x07DD] = true,
- [0x07CE] = true, [0x07DF] = true, [0x07D4] = true, [0x07E5] = true,
- [0x07E9] = true, [0x07E7] = true, [0x07E3] = true, [0x07E2] = true,
- [0x07E0] = true, [0x07E1] = true, [0x07DE] = true, [0x07DC] = true,
- [0x07D1] = true, [0x07DA] = true, [0x07D8] = true, [0x07D6] = true,
- [0x07D2] = true, [0x07D0] = true, [0x07CF] = true, [0x07CD] = true,
- [0x07CB] = true, [0x07D3] = true, [0x07E4] = true, [0x07D5] = true,
- [0x07E6] = true,
- -- also here:
- [tatweel]= true,
- [zwj] = true,
-}
+-------.dflt = analyzers.setstate % can be an option or just the default
local arab_warned = { }
--- todo: gref
-
local function warning(current,what)
- local char = current.char
+ local char = getchar(current)
if not arab_warned[char] then
log.report("analyze","arab: character %C has no %a class",char,what)
arab_warned[char] = true
end
end
--- potential optimization: local medial_final = table.merged(medial,final)
+local mappers = {
+ l = s_init, -- left
+ d = s_medi, -- double
+ c = s_medi, -- joiner
+ r = s_fina, -- right
+ u = s_isol, -- nonjoiner
+}
-local function finish(first,last)
- if last then
- if first == last then
- local fc = first.char
- if medial[fc] or final[fc] then
- first[a_state] = s_isol
- else
- warning(first,"isol")
- first[a_state] = s_error
+local classifiers = { } -- we can also use this trick for devanagari
+
+local first_arabic, last_arabic = characters.blockrange("arabic")
+local first_syriac, last_syriac = characters.blockrange("syriac")
+local first_mandiac, last_mandiac = characters.blockrange("mandiac")
+local first_nko, last_nko = characters.blockrange("nko")
+
+table.setmetatableindex(classifiers,function(t,k)
+ local c = chardata[k]
+ local v = false
+ if c then
+ local arabic = c.arabic
+ if arabic then
+ v = mappers[arabic]
+ if not v then
+ log.report("analyze","error in mapping arabic %C",k)
+ -- error
+ v = false
end
- else
- local lc = last.char
- if medial[lc] or final[lc] then
- -- if laststate == 1 or laststate == 2 or laststate == 4 then
- last[a_state] = s_fina
+ elseif k >= first_arabic and k <= last_arabic or k >= first_syriac and k <= last_syriac or
+ k >= first_mandiac and k <= last_mandiac or k >= first_nko and k <= last_nko then
+ if categories[k] == "mn" then
+ v = s_mark
else
- warning(last,"fina")
- last[a_state] = s_error
+ v = s_rest
end
- end
- first, last = nil, nil
- elseif first then
- -- first and last are either both set so we never com here
- local fc = first.char
- if medial[fc] or final[fc] then
- first[a_state] = s_isol
else
- warning(first,"isol")
- first[a_state] = s_error
end
- first = nil
end
- return first, last
-end
+ t[k] = v
+ return v
+end)
function methods.arab(head,font,attr)
- local useunicodemarks = analyzers.useunicodemarks
- local tfmdata = fontdata[font]
- local marks = tfmdata.resources.marks
- local first, last, current, done = nil, nil, head, false
+ local first, last = nil, nil
+ local c_first, c_last = nil, nil
+ local current, done = head, false
+ current = tonut(current)
while current do
- local id = current.id
- if id == glyph_code and current.font == font and current.subtype<256 and not current[a_state] then
+ local id = getid(current)
+ if id == glyph_code and getfont(current) == font and getsubtype(current)<256 and not getprop(current,a_state) then
done = true
- local char = current.char
- if marks[char] or (useunicodemarks and categories[char] == "mn") then
- current[a_state] = s_mark
- elseif isolated[char] then -- can be zwj or zwnj too
- first, last = finish(first,last)
- current[a_state] = s_isol
- first, last = nil, nil
- elseif not first then
- if medial[char] then
- current[a_state] = s_init
- first, last = first or current, current
- elseif final[char] then
- current[a_state] = s_isol
+ local char = getchar(current)
+ local classifier = classifiers[char]
+ if not classifier then
+ if last then
+ if c_last == s_medi or c_last == s_fina then
+ setprop(last,a_state,s_fina)
+ else
+ warning(last,"fina")
+ setprop(last,a_state,s_error)
+ end
first, last = nil, nil
- else -- no arab
- first, last = finish(first,last)
+ elseif first then
+ if c_first == s_medi or c_first == s_fina then
+ setprop(first,a_state,s_isol)
+ else
+ warning(first,"isol")
+ setprop(first,a_state,s_error)
+ end
+ first = nil
end
- elseif medial[char] then
- first, last = first or current, current
- current[a_state] = s_medi
- elseif final[char] then
- if not last[a_state] == s_init then
- -- tricky, we need to check what last may be !
- last[a_state] = s_medi
+ elseif classifier == s_mark then
+ setprop(current,a_state,s_mark)
+ elseif classifier == s_isol then
+ if last then
+ if c_last == s_medi or c_last == s_fina then
+ setprop(last,a_state,s_fina)
+ else
+ warning(last,"fina")
+ setprop(last,a_state,s_error)
+ end
+ first, last = nil, nil
+ elseif first then
+ if c_first == s_medi or c_first == s_fina then
+ setprop(first,a_state,s_isol)
+ else
+ warning(first,"isol")
+ setprop(first,a_state,s_error)
+ end
+ first = nil
+ end
+ setprop(current,a_state,s_isol)
+ elseif classifier == s_medi then
+ if first then
+ last = current
+ c_last = classifier
+ setprop(current,a_state,s_medi)
+ else
+ setprop(current,a_state,s_init)
+ first = current
+ c_first = classifier
+ end
+ elseif classifier == s_fina then
+ if last then
+ if getprop(last,a_state) ~= s_init then
+ setprop(last,a_state,s_medi)
+ end
+ setprop(current,a_state,s_fina)
+ first, last = nil, nil
+ elseif first then
+ -- if getprop(first,a_state) ~= s_init then
+ -- -- needs checking
+ -- setprop(first,a_state,s_medi)
+ -- end
+ setprop(current,a_state,s_fina)
+ first = nil
+ else
+ setprop(current,a_state,s_isol)
+ end
+ else -- classifier == s_rest
+ setprop(current,a_state,s_rest)
+ if last then
+ if c_last == s_medi or c_last == s_fina then
+ setprop(last,a_state,s_fina)
+ else
+ warning(last,"fina")
+ setprop(last,a_state,s_error)
+ end
+ first, last = nil, nil
+ elseif first then
+ if c_first == s_medi or c_first == s_fina then
+ setprop(first,a_state,s_isol)
+ else
+ warning(first,"isol")
+ setprop(first,a_state,s_error)
+ end
+ first = nil
end
- current[a_state] = s_fina
- first, last = nil, nil
- elseif char >= 0x0600 and char <= 0x06FF then -- needs checking
- current[a_state] = s_rest
- first, last = finish(first,last)
- else -- no
- first, last = finish(first,last)
end
else
- if first or last then
- first, last = finish(first,last)
+ if last then
+ if c_last == s_medi or c_last == s_fina then
+ setprop(last,a_state,s_fina)
+ else
+ warning(last,"fina")
+ setprop(last,a_state,s_error)
+ end
+ first, last = nil, nil
+ elseif first then
+ if c_first == s_medi or c_first == s_fina then
+ setprop(first,a_state,s_isol)
+ else
+ warning(first,"isol")
+ setprop(first,a_state,s_error)
+ end
+ first = nil
end
- if id == math_code then
+ if id == math_code then -- a bit duplicate as we test for glyphs twice
current = end_of_math(current)
end
end
- current = current.next
+ current = getnext(current)
end
- if first or last then
- finish(first,last)
+ if last then
+ if c_last == s_medi or c_last == s_fina then
+ setprop(last,a_state,s_fina)
+ else
+ warning(last,"fina")
+ setprop(last,a_state,s_error)
+ end
+ elseif first then
+ if c_first == s_medi or c_first == s_fina then
+ setprop(first,a_state,s_isol)
+ else
+ warning(first,"isol")
+ setprop(first,a_state,s_error)
+ end
end
return head, done
end