diff options
Diffstat (limited to 'tex/context/base/scrp-ini.lua')
-rw-r--r-- | tex/context/base/scrp-ini.lua | 1268 |
1 files changed, 634 insertions, 634 deletions
diff --git a/tex/context/base/scrp-ini.lua b/tex/context/base/scrp-ini.lua index fbe673db9..18f86475f 100644 --- a/tex/context/base/scrp-ini.lua +++ b/tex/context/base/scrp-ini.lua @@ -1,634 +1,634 @@ -if not modules then modules = { } end modules ['scrp-ini'] = {
- version = 1.001,
- comment = "companion to scrp-ini.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
--- We need to rewrite this a bit ... rather old code ... will be done when japanese
--- is finished.
-
-local attributes, nodes, node = attributes, nodes, node
-
-local trace_analyzing = false trackers.register("scripts.analyzing", function(v) trace_analyzing = v end)
-local trace_injections = false trackers.register("scripts.injections", function(v) trace_injections = v end)
-
-local report_preprocessing = logs.reporter("scripts","preprocessing")
-
-local utfchar = utf.char
-
-local first_glyph = node.first_glyph or node.first_character
-local traverse_id = node.traverse_id
-
-local texsetattribute = tex.setattribute
-
-local nodecodes = nodes.nodecodes
-local unsetvalue = attributes.unsetvalue
-
-local glyph_code = nodecodes.glyph
-local glue_code = nodecodes.glue
-
-local a_scriptinjection = attributes.private('scriptinjection')
-local a_scriptsplitting = attributes.private('scriptsplitting')
-local a_scriptstatus = attributes.private('scriptstatus')
-
-local fontdata = fonts.hashes.identifiers
-local allocate = utilities.storage.allocate
-local setnodecolor = nodes.tracers.colors.set
-local setmetatableindex = table.setmetatableindex
-
-local enableaction = nodes.tasks.enableaction
-local disableaction = nodes.tasks.disableaction
-
-scripts = scripts or { }
-local scripts = scripts
-
-scripts.hash = scripts.hash or { }
-local hash = scripts.hash
-
-local handlers = allocate()
-scripts.handlers = handlers
-
-local injectors = allocate()
-scripts.injectors = handlers
-
-local splitters = allocate()
-scripts.splitters = splitters
-
-local hash = { -- we could put these presets in char-def.lua
- --
- -- half width opening parenthesis
- --
- [0x0028] = "half_width_open",
- [0x005B] = "half_width_open",
- [0x007B] = "half_width_open",
- [0x2018] = "half_width_open", -- ‘
- [0x201C] = "half_width_open", -- “
- --
- -- full width opening parenthesis
- --
- [0x3008] = "full_width_open", -- 〈 Left book quote
- [0x300A] = "full_width_open", -- 《 Left double book quote
- [0x300C] = "full_width_open", -- 「 left quote
- [0x300E] = "full_width_open", -- 『 left double quote
- [0x3010] = "full_width_open", -- 【 left double book quote
- [0x3014] = "full_width_open", -- 〔 left book quote
- [0x3016] = "full_width_open", --〖 left double book quote
- [0x3018] = "full_width_open", -- left tortoise bracket
- [0x301A] = "full_width_open", -- left square bracket
- [0x301D] = "full_width_open", -- reverse double prime qm
- [0xFF08] = "full_width_open", -- ( left parenthesis
- [0xFF3B] = "full_width_open", -- [ left square brackets
- [0xFF5B] = "full_width_open", -- { left curve bracket
- --
- -- half width closing parenthesis
- --
- [0x0029] = "half_width_close",
- [0x005D] = "half_width_close",
- [0x007D] = "half_width_close",
- [0x2019] = "half_width_close", -- ’ right quote, right
- [0x201D] = "half_width_close", -- ” right double quote
- --
- -- full width closing parenthesis
- --
- [0x3009] = "full_width_close", -- 〉 book quote
- [0x300B] = "full_width_close", -- 》 double book quote
- [0x300D] = "full_width_close", -- 」 right quote, right
- [0x300F] = "full_width_close", -- 』 right double quote
- [0x3011] = "full_width_close", -- 】 right double book quote
- [0x3015] = "full_width_close", -- 〕 right book quote
- [0x3017] = "full_width_close", -- 〗 right double book quote
- [0x3019] = "full_width_close", -- right tortoise bracket
- [0x301B] = "full_width_close", -- right square bracket
- [0x301E] = "full_width_close", -- double prime qm
- [0x301F] = "full_width_close", -- low double prime qm
- [0xFF09] = "full_width_close", -- ) right parenthesis
- [0xFF3D] = "full_width_close", -- ] right square brackets
- [0xFF5D] = "full_width_close", -- } right curve brackets
- --
- [0xFF62] = "half_width_open", -- left corner bracket
- [0xFF63] = "half_width_close", -- right corner bracket
- --
- -- vertical opening vertical
- --
- -- 0xFE35, 0xFE37, 0xFE39, 0xFE3B, 0xFE3D, 0xFE3F, 0xFE41, 0xFE43, 0xFE47,
- --
- -- vertical closing
- --
- -- 0xFE36, 0xFE38, 0xFE3A, 0xFE3C, 0xFE3E, 0xFE40, 0xFE42, 0xFE44, 0xFE48,
- --
- -- half width opening punctuation
- --
- -- <empty>
- --
- -- full width opening punctuation
- --
- -- 0x2236, -- ∶
- -- 0xFF0C, -- ,
- --
- -- half width closing punctuation_hw
- --
- [0x0021] = "half_width_close", -- !
- [0x002C] = "half_width_close", -- ,
- [0x002E] = "half_width_close", -- .
- [0x003A] = "half_width_close", -- :
- [0x003B] = "half_width_close", -- ;
- [0x003F] = "half_width_close", -- ?
- [0xFF61] = "half_width_close", -- hw full stop
- --
- -- full width closing punctuation
- --
- [0x3001] = "full_width_close", -- 、
- [0x3002] = "full_width_close", -- 。
- [0xFF0C] = "full_width_close", -- ,
- [0xFF0E] = "full_width_close", --
- --
- -- depends on font
- --
- [0xFF01] = "full_width_close", -- !
- [0xFF1F] = "full_width_close", -- ?
- --
- [0xFF1A] = "full_width_punct", -- :
- [0xFF1B] = "full_width_punct", -- ;
- --
- -- non starter
- --
- [0x3005] = "non_starter", [0x3041] = "non_starter", [0x3043] = "non_starter", [0x3045] = "non_starter", [0x3047] = "non_starter",
- [0x3049] = "non_starter", [0x3063] = "non_starter", [0x3083] = "non_starter", [0x3085] = "non_starter", [0x3087] = "non_starter",
- [0x308E] = "non_starter", [0x3095] = "non_starter", [0x3096] = "non_starter", [0x309B] = "non_starter", [0x309C] = "non_starter",
- [0x309D] = "non_starter", [0x309E] = "non_starter", [0x30A0] = "non_starter", [0x30A1] = "non_starter", [0x30A3] = "non_starter",
- [0x30A5] = "non_starter", [0x30A7] = "non_starter", [0x30A9] = "non_starter", [0x30C3] = "non_starter", [0x30E3] = "non_starter",
- [0x30E5] = "non_starter", [0x30E7] = "non_starter", [0x30EE] = "non_starter", [0x30F5] = "non_starter", [0x30F6] = "non_starter",
- [0x30FC] = "non_starter", [0x30FD] = "non_starter", [0x30FE] = "non_starter", [0x31F0] = "non_starter", [0x31F1] = "non_starter",
- [0x30F2] = "non_starter", [0x30F3] = "non_starter", [0x30F4] = "non_starter", [0x31F5] = "non_starter", [0x31F6] = "non_starter",
- [0x30F7] = "non_starter", [0x30F8] = "non_starter", [0x30F9] = "non_starter", [0x31FA] = "non_starter", [0x31FB] = "non_starter",
- [0x30FC] = "non_starter", [0x30FD] = "non_starter", [0x30FE] = "non_starter", [0x31FF] = "non_starter",
- --
- -- hyphenation
- --
- [0x2026] = "hyphen", -- … ellipsis
- [0x2014] = "hyphen", -- — hyphen
- --
- [0x1361] = "ethiopic_word",
- [0x1362] = "ethiopic_sentence",
- --
-}
-
-local function provide(t,k)
- local v
- if not tonumber(k) then v = false
- elseif (k >= 0x03040 and k <= 0x030FF)
- or (k >= 0x031F0 and k <= 0x031FF)
- or (k >= 0x032D0 and k <= 0x032FE)
- or (k >= 0x0FF00 and k <= 0x0FFEF) then v = "katakana"
- elseif (k >= 0x03400 and k <= 0x04DFF)
- or (k >= 0x04E00 and k <= 0x09FFF)
- or (k >= 0x0F900 and k <= 0x0FAFF)
- or (k >= 0x20000 and k <= 0x2A6DF)
- or (k >= 0x2F800 and k <= 0x2FA1F) then v = "chinese"
- elseif (k >= 0x0AC00 and k <= 0x0D7A3) then v = "korean"
- elseif (k >= 0x01100 and k <= 0x0115F) then v = "jamo_initial"
- elseif (k >= 0x01160 and k <= 0x011A7) then v = "jamo_medial"
- elseif (k >= 0x011A8 and k <= 0x011FF) then v = "jamo_final"
- elseif (k >= 0x01200 and k <= 0x0139F) then v = "ethiopic_syllable"
- else v = false
- end
- t[k] = v
- return v
-end
-
-setmetatableindex(hash,provide)
-
-scripts.hash = hash
-
-local numbertodataset = allocate()
-local numbertohandler = allocate()
-
---~ storage.register("scripts/hash", hash, "scripts.hash")
-
-scripts.numbertodataset = numbertodataset
-scripts.numbertohandler = numbertohandler
-
-local defaults = {
- inter_char_shrink_factor = 0,
- inter_char_shrink_factor = 0,
- inter_char_stretch_factor = 0,
- inter_char_half_shrink_factor = 0,
- inter_char_half_stretch_factor = 0,
- inter_char_quarter_shrink_factor = 0,
- inter_char_quarter_stretch_factor = 0,
- inter_char_hangul_penalty = 0,
-
- inter_word_stretch_factor = 0,
-}
-
-scripts.defaults = defaults -- so we can add more
-
-function scripts.installmethod(handler)
- local name = handler.name
- handlers[name] = handler
- local attributes = { }
- local datasets = handler.datasets
- if not datasets or not datasets.default then
- report_preprocessing("missing (default) dataset in script %a",name)
- datasets.default = { } -- slower but an error anyway
- end
- for k, v in next, datasets do
- setmetatableindex(v,defaults)
- end
- setmetatable(attributes, {
- __index = function(t,k)
- local v = datasets[k] or datasets.default
- local a = unsetvalue
- if v then
- v.name = name -- for tracing
- a = #numbertodataset + 1
- numbertodataset[a] = v
- numbertohandler[a] = handler
- end
- t[k] = a
- return a
- end
- } )
- handler.attributes = attributes
-end
-
-function scripts.installdataset(specification) -- global overload
- local method = specification.method
- local name = specification.name
- local dataset = specification.dataset
- if method and name and dataset then
- local parent = specification.parent or ""
- local handler = handlers[method]
- if handler then
- local datasets = handler.datasets
- if datasets then
- local defaultset = datasets.default
- if defaultset then
- if parent ~= "" then
- local p = datasets[parent]
- if p then
- defaultset = p
- else
- report_preprocessing("dataset, unknown parent %a for method %a",parent,method)
- end
- end
- setmetatable(dataset,defaultset)
- local existing = datasets[name]
- if existing then
- for k, v in next, existing do
- existing[k] = dataset
- end
- else
- datasets[name] = dataset
- end
- else
- report_preprocessing("dataset, no default for method %a",method)
- end
- else
- report_preprocessing("dataset, no datasets for method %a",method)
- end
- else
- report_preprocessing("dataset, no method %a",method)
- end
- else
- report_preprocessing("dataset, invalid specification") -- maybe report table
- end
-end
-
-local injectorenabled = false
-local splitterenabled = false
-
-function scripts.set(name,method,preset)
- local handler = handlers[method]
- if handler then
- if handler.injector then
- if not injectorenabled then
- enableaction("processors","scripts.injectors.handler")
- injectorenabled = true
- end
- texsetattribute(a_scriptinjection,handler.attributes[preset] or unsetvalue)
- end
- if handler.splitter then
- if not splitterenabled then
- enableaction("processors","scripts.splitters.handler")
- splitterenabled = true
- end
- texsetattribute(a_scriptsplitting,handler.attributes[preset] or unsetvalue)
- end
- if handler.initializer then
- handler.initializer(handler)
- handler.initializer = nil
- end
- else
- texsetattribute(a_scriptinjection,unsetvalue)
- texsetattribute(a_scriptsplitting,unsetvalue)
- end
-end
-
-function scripts.reset()
- texsetattribute(a_scriptinjection,unsetvalue)
- texsetattribute(a_scriptsplitting,unsetvalue)
-end
-
--- the following tables will become a proper installer (move to cjk/eth)
---
--- 0=gray 1=red 2=green 3=blue 4=yellow 5=magenta 6=cyan 7=x-yellow 8=x-magenta 9=x-cyan
-
-local scriptcolors = allocate { -- todo: just named colors
- korean = "trace:0",
- chinese = "trace:0",
- katakana = "trace:0",
- hiragana = "trace:0",
- full_width_open = "trace:1",
- full_width_close = "trace:2",
- half_width_open = "trace:3",
- half_width_close = "trace:4",
- full_width_punct = "trace:5",
- hyphen = "trace:5",
- non_starter = "trace:6",
- jamo_initial = "trace:7",
- jamo_medial = "trace:8",
- jamo_final = "trace:9",
- ethiopic_syllable = "trace:1",
- ethiopic_word = "trace:2",
- ethiopic_sentence = "trace:3",
-}
-
-scripts.colors = scriptcolors
-
-local numbertocategory = allocate { -- rather bound to cjk ... will be generalized
- "korean",
- "chinese",
- "katakana",
- "hiragana",
- "full_width_open",
- "full_width_close",
- "half_width_open",
- "half_width_close",
- "full_width_punct",
- "hyphen",
- "non_starter",
- "jamo_initial",
- "jamo_medial",
- "jamo_final",
- "ethiopic_syllable",
- "ethiopic_word",
- "ethiopic_sentence",
-}
-
-local categorytonumber = allocate(table.swapped(numbertocategory)) -- could be one table
-
-scripts.categorytonumber = categorytonumber
-scripts.numbertocategory = numbertocategory
-
-local function colorize(start,stop)
- for n in traverse_id(glyph_code,start) do
- local kind = numbertocategory[n[a_scriptstatus]]
- if kind then
- local ac = scriptcolors[kind]
- if ac then
- setnodecolor(n,ac)
- end
- end
- if n == stop then
- break
- end
- end
-end
-
-local function traced_process(head,first,last,process,a)
- if start ~= last then
- local f, l = first, last
- local name = numbertodataset[a]
- name = name and name.name or "?"
- report_preprocessing("before %s: %s",name,nodes.tosequence(f,l))
- process(head,first,last)
- report_preprocessing("after %s: %s", name,nodes.tosequence(f,l))
- end
-end
-
--- eventually we might end up with more extensive parsing
--- todo: pass t[start..stop] == original
---
--- one of the time consuming functions:
-
--- we can have a fonts.hashes.originals
-
-function scripts.injectors.handler(head)
- local start = first_glyph(head) -- we already have glyphs here (subtype 1)
- if not start then
- return head, false
- else
- local last_a, normal_process, lastfont, originals = nil, nil, nil, nil
- local done, first, last, ok = false, nil, nil, false
- while start do
- local id = start.id
- if id == glyph_code then
- local a = start[a_scriptinjection]
- if a then
- if a ~= last_a then
- if first then
- if ok then
- if trace_analyzing then
- colorize(first,last)
- end
- if trace_injections then
- traced_process(head,first,last,normal_process,last_a)
- else
- normal_process(head,first,last)
- end
- ok, done = false, true
- end
- first, last = nil, nil
- end
- last_a = a
- local handler = numbertohandler[a]
- normal_process = handler.injector
- end
- if normal_process then
- local f = start.font
- if f ~= lastfont then
- originals = fontdata[f].resources
- if resources then
- originals = resources.originals
- else
- -- can't happen
- end
- lastfont = f
- end
- local c = start.char
- if originals then
- c = originals[c] or c
- end
- local h = hash[c]
- if h then
- start[a_scriptstatus] = categorytonumber[h]
- if not first then
- first, last = start, start
- else
- last = start
- end
- -- if cjk == "chinese" or cjk == "korean" then -- we need to prevent too much ( ) processing
- ok = true
- -- end
- elseif first then
- if ok then
- if trace_analyzing then
- colorize(first,last)
- end
- if trace_injections then
- traced_process(head,first,last,normal_process,last_a)
- else
- normal_process(head,first,last)
- end
- ok, done = false, true
- end
- first, last = nil, nil
- end
- end
- elseif first then
- if ok then
- if trace_analyzing then
- colorize(first,last)
- end
- if trace_injections then
- traced_process(head,first,last,normal_process,last_a)
- else
- normal_process(head,first,last)
- end
- ok, done = false, true
- end
- first, last = nil, nil
- end
- elseif id == glue_code then
- if ok then
- -- continue
- elseif first then
- -- no chinese or korean
- first, last = nil, nil
- end
- elseif first then
- if ok then
- -- some chinese or korean
- if trace_analyzing then
- colorize(first,last)
- end
- if trace_injections then
- traced_process(head,first,last,normal_process,last_a)
- else
- normal_process(head,first,last)
- end
- first, last, ok, done = nil, nil, false, true
- elseif first then
- first, last = nil, nil
- end
- end
- start = start.next
- end
- if ok then
- if trace_analyzing then
- colorize(first,last)
- end
- if trace_injections then
- traced_process(head,first,last,normal_process,last_a)
- else
- normal_process(head,first,last)
- end
- done = true
- end
- return head, done
- end
-end
-
-function scripts.splitters.handler(head)
- return head, false
-end
-
--- new plugin:
-
-local registercontext = fonts.specifiers.registercontext
-local mergecontext = fonts.specifiers.mergecontext
-
-local otfscripts = characters.otfscripts
-
-local report_scripts = logs.reporter("scripts","auto feature")
-local trace_scripts = false trackers.register("scripts.autofeature",function(v) trace_scripts = v end)
-
-local autofontfeature = scripts.autofontfeature or { }
-scripts.autofontfeature = autofontfeature
-
-local cache_yes = { }
-local cache_nop = { }
-
-setmetatableindex(cache_yes,function(t,k) local v = { } t[k] = v return v end)
-setmetatableindex(cache_nop,function(t,k) local v = { } t[k] = v return v end)
-
--- beware: we need to tag a done (otherwise too many extra instances ... but how
--- often unpack? wait till we have a bitmap
---
--- we can consider merging this in handlers.characters(head) at some point as there
--- already check for the dynamic attribute so it saves a pass, however, then we also
--- need to check for a_scriptinjection there which nils the benefit
---
--- we can consider cheating: set all glyphs in a word as the first one but it's not
--- playing nice
-
-function autofontfeature.handler(head)
- for n in traverse_id(glyph_code,head) do
- -- if n[a_scriptinjection] then
- -- -- already tagged by script feature, maybe some day adapt
- -- else
- local char = n.char
- local script = otfscripts[char]
- if script then
- local dynamic = n[0] or 0
- local font = n.font
- if dynamic > 0 then
- local slot = cache_yes[font]
- local attr = slot[script]
- if not attr then
- attr = mergecontext(dynamic,name,2)
- slot[script] = attr
- if trace_scripts then
- report_scripts("script: %s, trigger %C, dynamic: %a, variant: %a",script,char,attr,"extended")
- end
- end
- if attr ~= 0 then
- n[0] = attr
- -- maybe set scriptinjection when associated
- end
- else
- local slot = cache_nop[font]
- local attr = slot[script]
- if not attr then
- attr = registercontext(font,script,2)
- slot[script] = attr
- if trace_scripts then
- report_scripts("script: %s, trigger %C, dynamic: %s, variant: %a",script,char,attr,"normal")
- end
- end
- if attr ~= 0 then
- n[0] = attr
- -- maybe set scriptinjection when associated
- end
- end
- end
- -- end
- end
- return head
-end
-
-function autofontfeature.enable()
- report_scripts("globally enabled")
- enableaction("processors","scripts.autofontfeature.handler")
-end
-
-function autofontfeature.disable()
- report_scripts("globally disabled")
- disableaction("processors","scripts.autofontfeature.handler")
-end
-
-commands.enableautofontscript = autofontfeature.enable
-commands.disableautofontscript = autofontfeature.disable
+if not modules then modules = { } end modules ['scrp-ini'] = { + version = 1.001, + comment = "companion to scrp-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- We need to rewrite this a bit ... rather old code ... will be done when japanese +-- is finished. + +local attributes, nodes, node = attributes, nodes, node + +local trace_analyzing = false trackers.register("scripts.analyzing", function(v) trace_analyzing = v end) +local trace_injections = false trackers.register("scripts.injections", function(v) trace_injections = v end) + +local report_preprocessing = logs.reporter("scripts","preprocessing") + +local utfchar = utf.char + +local first_glyph = node.first_glyph or node.first_character +local traverse_id = node.traverse_id + +local texsetattribute = tex.setattribute + +local nodecodes = nodes.nodecodes +local unsetvalue = attributes.unsetvalue + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue + +local a_scriptinjection = attributes.private('scriptinjection') +local a_scriptsplitting = attributes.private('scriptsplitting') +local a_scriptstatus = attributes.private('scriptstatus') + +local fontdata = fonts.hashes.identifiers +local allocate = utilities.storage.allocate +local setnodecolor = nodes.tracers.colors.set +local setmetatableindex = table.setmetatableindex + +local enableaction = nodes.tasks.enableaction +local disableaction = nodes.tasks.disableaction + +scripts = scripts or { } +local scripts = scripts + +scripts.hash = scripts.hash or { } +local hash = scripts.hash + +local handlers = allocate() +scripts.handlers = handlers + +local injectors = allocate() +scripts.injectors = handlers + +local splitters = allocate() +scripts.splitters = splitters + +local hash = { -- we could put these presets in char-def.lua + -- + -- half width opening parenthesis + -- + [0x0028] = "half_width_open", + [0x005B] = "half_width_open", + [0x007B] = "half_width_open", + [0x2018] = "half_width_open", -- ‘ + [0x201C] = "half_width_open", -- “ + -- + -- full width opening parenthesis + -- + [0x3008] = "full_width_open", -- 〈 Left book quote + [0x300A] = "full_width_open", -- 《 Left double book quote + [0x300C] = "full_width_open", -- 「 left quote + [0x300E] = "full_width_open", -- 『 left double quote + [0x3010] = "full_width_open", -- 【 left double book quote + [0x3014] = "full_width_open", -- 〔 left book quote + [0x3016] = "full_width_open", --〖 left double book quote + [0x3018] = "full_width_open", -- left tortoise bracket + [0x301A] = "full_width_open", -- left square bracket + [0x301D] = "full_width_open", -- reverse double prime qm + [0xFF08] = "full_width_open", -- ( left parenthesis + [0xFF3B] = "full_width_open", -- [ left square brackets + [0xFF5B] = "full_width_open", -- { left curve bracket + -- + -- half width closing parenthesis + -- + [0x0029] = "half_width_close", + [0x005D] = "half_width_close", + [0x007D] = "half_width_close", + [0x2019] = "half_width_close", -- ’ right quote, right + [0x201D] = "half_width_close", -- ” right double quote + -- + -- full width closing parenthesis + -- + [0x3009] = "full_width_close", -- 〉 book quote + [0x300B] = "full_width_close", -- 》 double book quote + [0x300D] = "full_width_close", -- 」 right quote, right + [0x300F] = "full_width_close", -- 』 right double quote + [0x3011] = "full_width_close", -- 】 right double book quote + [0x3015] = "full_width_close", -- 〕 right book quote + [0x3017] = "full_width_close", -- 〗 right double book quote + [0x3019] = "full_width_close", -- right tortoise bracket + [0x301B] = "full_width_close", -- right square bracket + [0x301E] = "full_width_close", -- double prime qm + [0x301F] = "full_width_close", -- low double prime qm + [0xFF09] = "full_width_close", -- ) right parenthesis + [0xFF3D] = "full_width_close", -- ] right square brackets + [0xFF5D] = "full_width_close", -- } right curve brackets + -- + [0xFF62] = "half_width_open", -- left corner bracket + [0xFF63] = "half_width_close", -- right corner bracket + -- + -- vertical opening vertical + -- + -- 0xFE35, 0xFE37, 0xFE39, 0xFE3B, 0xFE3D, 0xFE3F, 0xFE41, 0xFE43, 0xFE47, + -- + -- vertical closing + -- + -- 0xFE36, 0xFE38, 0xFE3A, 0xFE3C, 0xFE3E, 0xFE40, 0xFE42, 0xFE44, 0xFE48, + -- + -- half width opening punctuation + -- + -- <empty> + -- + -- full width opening punctuation + -- + -- 0x2236, -- ∶ + -- 0xFF0C, -- , + -- + -- half width closing punctuation_hw + -- + [0x0021] = "half_width_close", -- ! + [0x002C] = "half_width_close", -- , + [0x002E] = "half_width_close", -- . + [0x003A] = "half_width_close", -- : + [0x003B] = "half_width_close", -- ; + [0x003F] = "half_width_close", -- ? + [0xFF61] = "half_width_close", -- hw full stop + -- + -- full width closing punctuation + -- + [0x3001] = "full_width_close", -- 、 + [0x3002] = "full_width_close", -- 。 + [0xFF0C] = "full_width_close", -- , + [0xFF0E] = "full_width_close", -- + -- + -- depends on font + -- + [0xFF01] = "full_width_close", -- ! + [0xFF1F] = "full_width_close", -- ? + -- + [0xFF1A] = "full_width_punct", -- : + [0xFF1B] = "full_width_punct", -- ; + -- + -- non starter + -- + [0x3005] = "non_starter", [0x3041] = "non_starter", [0x3043] = "non_starter", [0x3045] = "non_starter", [0x3047] = "non_starter", + [0x3049] = "non_starter", [0x3063] = "non_starter", [0x3083] = "non_starter", [0x3085] = "non_starter", [0x3087] = "non_starter", + [0x308E] = "non_starter", [0x3095] = "non_starter", [0x3096] = "non_starter", [0x309B] = "non_starter", [0x309C] = "non_starter", + [0x309D] = "non_starter", [0x309E] = "non_starter", [0x30A0] = "non_starter", [0x30A1] = "non_starter", [0x30A3] = "non_starter", + [0x30A5] = "non_starter", [0x30A7] = "non_starter", [0x30A9] = "non_starter", [0x30C3] = "non_starter", [0x30E3] = "non_starter", + [0x30E5] = "non_starter", [0x30E7] = "non_starter", [0x30EE] = "non_starter", [0x30F5] = "non_starter", [0x30F6] = "non_starter", + [0x30FC] = "non_starter", [0x30FD] = "non_starter", [0x30FE] = "non_starter", [0x31F0] = "non_starter", [0x31F1] = "non_starter", + [0x30F2] = "non_starter", [0x30F3] = "non_starter", [0x30F4] = "non_starter", [0x31F5] = "non_starter", [0x31F6] = "non_starter", + [0x30F7] = "non_starter", [0x30F8] = "non_starter", [0x30F9] = "non_starter", [0x31FA] = "non_starter", [0x31FB] = "non_starter", + [0x30FC] = "non_starter", [0x30FD] = "non_starter", [0x30FE] = "non_starter", [0x31FF] = "non_starter", + -- + -- hyphenation + -- + [0x2026] = "hyphen", -- … ellipsis + [0x2014] = "hyphen", -- — hyphen + -- + [0x1361] = "ethiopic_word", + [0x1362] = "ethiopic_sentence", + -- +} + +local function provide(t,k) + local v + if not tonumber(k) then v = false + elseif (k >= 0x03040 and k <= 0x030FF) + or (k >= 0x031F0 and k <= 0x031FF) + or (k >= 0x032D0 and k <= 0x032FE) + or (k >= 0x0FF00 and k <= 0x0FFEF) then v = "katakana" + elseif (k >= 0x03400 and k <= 0x04DFF) + or (k >= 0x04E00 and k <= 0x09FFF) + or (k >= 0x0F900 and k <= 0x0FAFF) + or (k >= 0x20000 and k <= 0x2A6DF) + or (k >= 0x2F800 and k <= 0x2FA1F) then v = "chinese" + elseif (k >= 0x0AC00 and k <= 0x0D7A3) then v = "korean" + elseif (k >= 0x01100 and k <= 0x0115F) then v = "jamo_initial" + elseif (k >= 0x01160 and k <= 0x011A7) then v = "jamo_medial" + elseif (k >= 0x011A8 and k <= 0x011FF) then v = "jamo_final" + elseif (k >= 0x01200 and k <= 0x0139F) then v = "ethiopic_syllable" + else v = false + end + t[k] = v + return v +end + +setmetatableindex(hash,provide) + +scripts.hash = hash + +local numbertodataset = allocate() +local numbertohandler = allocate() + +--~ storage.register("scripts/hash", hash, "scripts.hash") + +scripts.numbertodataset = numbertodataset +scripts.numbertohandler = numbertohandler + +local defaults = { + inter_char_shrink_factor = 0, + inter_char_shrink_factor = 0, + inter_char_stretch_factor = 0, + inter_char_half_shrink_factor = 0, + inter_char_half_stretch_factor = 0, + inter_char_quarter_shrink_factor = 0, + inter_char_quarter_stretch_factor = 0, + inter_char_hangul_penalty = 0, + + inter_word_stretch_factor = 0, +} + +scripts.defaults = defaults -- so we can add more + +function scripts.installmethod(handler) + local name = handler.name + handlers[name] = handler + local attributes = { } + local datasets = handler.datasets + if not datasets or not datasets.default then + report_preprocessing("missing (default) dataset in script %a",name) + datasets.default = { } -- slower but an error anyway + end + for k, v in next, datasets do + setmetatableindex(v,defaults) + end + setmetatable(attributes, { + __index = function(t,k) + local v = datasets[k] or datasets.default + local a = unsetvalue + if v then + v.name = name -- for tracing + a = #numbertodataset + 1 + numbertodataset[a] = v + numbertohandler[a] = handler + end + t[k] = a + return a + end + } ) + handler.attributes = attributes +end + +function scripts.installdataset(specification) -- global overload + local method = specification.method + local name = specification.name + local dataset = specification.dataset + if method and name and dataset then + local parent = specification.parent or "" + local handler = handlers[method] + if handler then + local datasets = handler.datasets + if datasets then + local defaultset = datasets.default + if defaultset then + if parent ~= "" then + local p = datasets[parent] + if p then + defaultset = p + else + report_preprocessing("dataset, unknown parent %a for method %a",parent,method) + end + end + setmetatable(dataset,defaultset) + local existing = datasets[name] + if existing then + for k, v in next, existing do + existing[k] = dataset + end + else + datasets[name] = dataset + end + else + report_preprocessing("dataset, no default for method %a",method) + end + else + report_preprocessing("dataset, no datasets for method %a",method) + end + else + report_preprocessing("dataset, no method %a",method) + end + else + report_preprocessing("dataset, invalid specification") -- maybe report table + end +end + +local injectorenabled = false +local splitterenabled = false + +function scripts.set(name,method,preset) + local handler = handlers[method] + if handler then + if handler.injector then + if not injectorenabled then + enableaction("processors","scripts.injectors.handler") + injectorenabled = true + end + texsetattribute(a_scriptinjection,handler.attributes[preset] or unsetvalue) + end + if handler.splitter then + if not splitterenabled then + enableaction("processors","scripts.splitters.handler") + splitterenabled = true + end + texsetattribute(a_scriptsplitting,handler.attributes[preset] or unsetvalue) + end + if handler.initializer then + handler.initializer(handler) + handler.initializer = nil + end + else + texsetattribute(a_scriptinjection,unsetvalue) + texsetattribute(a_scriptsplitting,unsetvalue) + end +end + +function scripts.reset() + texsetattribute(a_scriptinjection,unsetvalue) + texsetattribute(a_scriptsplitting,unsetvalue) +end + +-- the following tables will become a proper installer (move to cjk/eth) +-- +-- 0=gray 1=red 2=green 3=blue 4=yellow 5=magenta 6=cyan 7=x-yellow 8=x-magenta 9=x-cyan + +local scriptcolors = allocate { -- todo: just named colors + korean = "trace:0", + chinese = "trace:0", + katakana = "trace:0", + hiragana = "trace:0", + full_width_open = "trace:1", + full_width_close = "trace:2", + half_width_open = "trace:3", + half_width_close = "trace:4", + full_width_punct = "trace:5", + hyphen = "trace:5", + non_starter = "trace:6", + jamo_initial = "trace:7", + jamo_medial = "trace:8", + jamo_final = "trace:9", + ethiopic_syllable = "trace:1", + ethiopic_word = "trace:2", + ethiopic_sentence = "trace:3", +} + +scripts.colors = scriptcolors + +local numbertocategory = allocate { -- rather bound to cjk ... will be generalized + "korean", + "chinese", + "katakana", + "hiragana", + "full_width_open", + "full_width_close", + "half_width_open", + "half_width_close", + "full_width_punct", + "hyphen", + "non_starter", + "jamo_initial", + "jamo_medial", + "jamo_final", + "ethiopic_syllable", + "ethiopic_word", + "ethiopic_sentence", +} + +local categorytonumber = allocate(table.swapped(numbertocategory)) -- could be one table + +scripts.categorytonumber = categorytonumber +scripts.numbertocategory = numbertocategory + +local function colorize(start,stop) + for n in traverse_id(glyph_code,start) do + local kind = numbertocategory[n[a_scriptstatus]] + if kind then + local ac = scriptcolors[kind] + if ac then + setnodecolor(n,ac) + end + end + if n == stop then + break + end + end +end + +local function traced_process(head,first,last,process,a) + if start ~= last then + local f, l = first, last + local name = numbertodataset[a] + name = name and name.name or "?" + report_preprocessing("before %s: %s",name,nodes.tosequence(f,l)) + process(head,first,last) + report_preprocessing("after %s: %s", name,nodes.tosequence(f,l)) + end +end + +-- eventually we might end up with more extensive parsing +-- todo: pass t[start..stop] == original +-- +-- one of the time consuming functions: + +-- we can have a fonts.hashes.originals + +function scripts.injectors.handler(head) + local start = first_glyph(head) -- we already have glyphs here (subtype 1) + if not start then + return head, false + else + local last_a, normal_process, lastfont, originals = nil, nil, nil, nil + local done, first, last, ok = false, nil, nil, false + while start do + local id = start.id + if id == glyph_code then + local a = start[a_scriptinjection] + if a then + if a ~= last_a then + if first then + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + ok, done = false, true + end + first, last = nil, nil + end + last_a = a + local handler = numbertohandler[a] + normal_process = handler.injector + end + if normal_process then + local f = start.font + if f ~= lastfont then + originals = fontdata[f].resources + if resources then + originals = resources.originals + else + -- can't happen + end + lastfont = f + end + local c = start.char + if originals then + c = originals[c] or c + end + local h = hash[c] + if h then + start[a_scriptstatus] = categorytonumber[h] + if not first then + first, last = start, start + else + last = start + end + -- if cjk == "chinese" or cjk == "korean" then -- we need to prevent too much ( ) processing + ok = true + -- end + elseif first then + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + ok, done = false, true + end + first, last = nil, nil + end + end + elseif first then + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + ok, done = false, true + end + first, last = nil, nil + end + elseif id == glue_code then + if ok then + -- continue + elseif first then + -- no chinese or korean + first, last = nil, nil + end + elseif first then + if ok then + -- some chinese or korean + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + first, last, ok, done = nil, nil, false, true + elseif first then + first, last = nil, nil + end + end + start = start.next + end + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + done = true + end + return head, done + end +end + +function scripts.splitters.handler(head) + return head, false +end + +-- new plugin: + +local registercontext = fonts.specifiers.registercontext +local mergecontext = fonts.specifiers.mergecontext + +local otfscripts = characters.otfscripts + +local report_scripts = logs.reporter("scripts","auto feature") +local trace_scripts = false trackers.register("scripts.autofeature",function(v) trace_scripts = v end) + +local autofontfeature = scripts.autofontfeature or { } +scripts.autofontfeature = autofontfeature + +local cache_yes = { } +local cache_nop = { } + +setmetatableindex(cache_yes,function(t,k) local v = { } t[k] = v return v end) +setmetatableindex(cache_nop,function(t,k) local v = { } t[k] = v return v end) + +-- beware: we need to tag a done (otherwise too many extra instances ... but how +-- often unpack? wait till we have a bitmap +-- +-- we can consider merging this in handlers.characters(head) at some point as there +-- already check for the dynamic attribute so it saves a pass, however, then we also +-- need to check for a_scriptinjection there which nils the benefit +-- +-- we can consider cheating: set all glyphs in a word as the first one but it's not +-- playing nice + +function autofontfeature.handler(head) + for n in traverse_id(glyph_code,head) do + -- if n[a_scriptinjection] then + -- -- already tagged by script feature, maybe some day adapt + -- else + local char = n.char + local script = otfscripts[char] + if script then + local dynamic = n[0] or 0 + local font = n.font + if dynamic > 0 then + local slot = cache_yes[font] + local attr = slot[script] + if not attr then + attr = mergecontext(dynamic,name,2) + slot[script] = attr + if trace_scripts then + report_scripts("script: %s, trigger %C, dynamic: %a, variant: %a",script,char,attr,"extended") + end + end + if attr ~= 0 then + n[0] = attr + -- maybe set scriptinjection when associated + end + else + local slot = cache_nop[font] + local attr = slot[script] + if not attr then + attr = registercontext(font,script,2) + slot[script] = attr + if trace_scripts then + report_scripts("script: %s, trigger %C, dynamic: %s, variant: %a",script,char,attr,"normal") + end + end + if attr ~= 0 then + n[0] = attr + -- maybe set scriptinjection when associated + end + end + end + -- end + end + return head +end + +function autofontfeature.enable() + report_scripts("globally enabled") + enableaction("processors","scripts.autofontfeature.handler") +end + +function autofontfeature.disable() + report_scripts("globally disabled") + disableaction("processors","scripts.autofontfeature.handler") +end + +commands.enableautofontscript = autofontfeature.enable +commands.disableautofontscript = autofontfeature.disable |