summaryrefslogtreecommitdiff
path: root/tex/context/base/scrp-ini.lua
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2013-06-04 19:42:00 +0200
committerHans Hagen <pragma@wxs.nl>2013-06-04 19:42:00 +0200
commitfb6533c362f27e9811417482f57ea8cbdd31abf0 (patch)
tree1f97acb4bbbb3777738581c89e7211b0664f678a /tex/context/base/scrp-ini.lua
parentf7eaca8dd3301d6526d1610e523d6538404dc95b (diff)
downloadcontext-fb6533c362f27e9811417482f57ea8cbdd31abf0.tar.gz
beta 2013.06.04 19:42
Diffstat (limited to 'tex/context/base/scrp-ini.lua')
-rw-r--r--tex/context/base/scrp-ini.lua306
1 files changed, 300 insertions, 6 deletions
diff --git a/tex/context/base/scrp-ini.lua b/tex/context/base/scrp-ini.lua
index 18f86475f..56422e622 100644
--- a/tex/context/base/scrp-ini.lua
+++ b/tex/context/base/scrp-ini.lua
@@ -11,12 +11,16 @@ if not modules then modules = { } end modules ['scrp-ini'] = {
local attributes, nodes, node = attributes, nodes, node
-local trace_analyzing = false trackers.register("scripts.analyzing", function(v) trace_analyzing = v end)
-local trace_injections = false trackers.register("scripts.injections", function(v) trace_injections = v end)
+local trace_analyzing = false trackers.register("scripts.analyzing", function(v) trace_analyzing = v end)
+local trace_injections = false trackers.register("scripts.injections", function(v) trace_injections = v end)
+local trace_splitting = false trackers.register("scripts.splitting", function(v) trace_splitting = v end)
+local trace_splitdetail = false trackers.register("scripts.splitring.detail", function(v) trace_splitdetail = v end)
local report_preprocessing = logs.reporter("scripts","preprocessing")
+local report_splitting = logs.reporter("scripts","splitting")
-local utfchar = utf.char
+local utfbyte, utfsplit = utf.byte, utf.split
+local gmatch = string.gmatch
local first_glyph = node.first_glyph or node.first_character
local traverse_id = node.traverse_id
@@ -29,6 +33,9 @@ local unsetvalue = attributes.unsetvalue
local glyph_code = nodecodes.glyph
local glue_code = nodecodes.glue
+local emwidths = fonts.hashes.emwidths
+local exheights = fonts.hashes.exheights
+
local a_scriptinjection = attributes.private('scriptinjection')
local a_scriptsplitting = attributes.private('scriptsplitting')
local a_scriptstatus = attributes.private('scriptstatus')
@@ -41,6 +48,14 @@ local setmetatableindex = table.setmetatableindex
local enableaction = nodes.tasks.enableaction
local disableaction = nodes.tasks.disableaction
+local insert_node_after = node.insert_after
+
+local nodepool = nodes.pool
+local new_glue = nodepool.glue
+local new_rule = nodepool.rule
+local new_penalty = nodepool.penalty
+----- new_gluespec = nodepool.gluespec
+
scripts = scripts or { }
local scripts = scripts
@@ -198,7 +213,7 @@ local function provide(t,k)
return v
end
-setmetatableindex(hash,provide)
+setmetatableindex(hash,provide) -- should come from char-def
scripts.hash = hash
@@ -542,10 +557,289 @@ function scripts.injectors.handler(head)
end
end
-function scripts.splitters.handler(head)
- return head, false
+-- kind of experimental .. might move to it's own module
+
+-- function scripts.splitters.handler(head)
+-- return head, false
+-- end
+
+local function addwords(tree,data)
+ if not tree then
+ tree = { }
+ end
+ for word in gmatch(data,"%S+") do
+ local root = tree
+ local list = utfsplit(word,true)
+ for i=1,#list do
+ local l = utfbyte(list[i])
+ local r = root[l]
+ if not r then
+ r = { }
+ root[l] = r
+ end
+ if i == #list then
+ r.final = word -- true -- could be something else, like word in case of tracing
+ else
+ root = r
+ end
+ end
+ end
+ return tree
+end
+
+local loaded = { }
+
+function splitters.load(handler,files)
+ local files = handler.files
+ local tree = handler.tree or { }
+ handler.tree = tree
+ if not files then
+ return
+ elseif type(files) == "string" then
+ files = { files }
+ handler.files = files
+ end
+ if trace_splitting then
+ report_splitting("loading splitter data for language/script %a",handler.name)
+ end
+ loaded[handler.name or "unknown"] = (loaded[handler.name or "unknown"] or 0) + 1
+ statistics.starttiming(loaded)
+ for i=1,#files do
+ local filename = files[i]
+ local fullname = resolvers.findfile(filename)
+ if fullname == "" then
+ fullname = resolvers.findfile(filename .. ".gz")
+ end
+ if fullname ~= "" then
+ if trace_splitting then
+ report_splitting("loading file %a",fullname)
+ end
+ local suffix, gzipped = gzip.suffix(fullname)
+ if suffix == "lua" then
+ local specification = table.load(fullname,gzipped and gzip.load)
+ if specification then
+ local lists = specification.lists
+ if lists then
+ for i=1,#lists do
+ local entry = lists[i]
+ local data = entry.data
+ if data then
+ if entry.compression == "zlib" then
+ data = zlib.decompress(data)
+ if entry.length and entry.length ~= #data then
+ report_splitting("compression error in file %a",fullname)
+ end
+ end
+ if data then
+ addwords(tree,data)
+ end
+ end
+ end
+ end
+ end
+ else
+ local data = gzipped and io.loadgzip(fullname) or io.loaddata(fullname)
+ if data then
+ addwords(tree,data)
+ end
+ end
+ else
+ report_splitting("unknown file %a",filename)
+ end
+ end
+ statistics.stoptiming(loaded)
+ return tree
+end
+
+statistics.register("loaded split lists", function()
+ if next(loaded) then
+ return string.format("%s, load time: %s",table.sequenced(loaded),statistics.elapsedtime(loaded))
+ end
+end)
+
+-- function splitters.addlist(name,filename)
+-- local handler = scripts.handlers[name]
+-- if handler and filename then
+-- local files = handler.files
+-- if not files then
+-- files = { }
+-- elseif type(files) == "string" then
+-- files = { files }
+-- end
+-- handler.files = files
+-- if type(filename) == "string" then
+-- filename = utilities.parsers.settings_to_array(filename)
+-- end
+-- if type(filename) == "table" then
+-- for i=1,#filename do
+-- files[#files+1] = filenames[i]
+-- end
+-- end
+-- end
+-- end
+--
+-- commands.setscriptsplitterlist = splitters.addlist
+
+local categories = characters.categories or { }
+
+local function hit(root,head)
+ local current = head.next
+ local lastrun = false
+ local lastfinal = false
+ while current and current.id == glyph_code do
+ local char = current.char
+ local newroot = root[char]
+ if newroot then
+ local final = newroot.final
+ if final then
+ lastrun = current
+ lastfinal = final
+ end
+ root = newroot
+ elseif categories[char] == "mn" then
+ -- continue
+ else
+ return lastrun, lastfinal
+ end
+ current = current.next
+ end
+ if lastrun then
+ return lastrun, lastfinal
+ end
end
+local tree, attr, proc
+
+function splitters.handler(head)
+ local current = head
+ local done = false
+ while current do
+ if current.id == glyph_code then
+ local a = current[a_scriptsplitting]
+ if a then
+ if a ~= attr then
+ local handler = numbertohandler[a]
+ tree = handler.tree or { }
+ attr = a
+ proc = handler.splitter
+ end
+ if proc then
+ local root = tree[current.char]
+ if root then
+ -- we don't check for attributes in the hitter (yet)
+ local last, final = hit(root,current)
+ if last then
+ local next = last.next
+ if next and next.id == glyph_code then
+ local nextchar = next.char
+ if tree[nextchar] then
+ if trace_splitdetail then
+ if type(final) == "string" then
+ report_splitting("advance %s processing between <%s> and <%c>","with",final,nextchar)
+ else
+ report_splitting("advance %s processing between <%c> and <%c>","with",char,nextchar)
+ end
+ end
+ head, current = proc(handler,head,current,last,1)
+ done = true
+ else
+ if trace_splitdetail then
+ -- could be punctuation
+ if type(final) == "string" then
+ report_splitting("advance %s processing between <%s> and <%c>","without",final,nextchar)
+ else
+ report_splitting("advance %s processing between <%c> and <%c>","without",char,nextchar)
+ end
+ end
+ head, current = proc(handler,head,current,last,2)
+ done = true
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ current = current.next
+ end
+ return head, done
+end
+
+local function marker(head,current,font,color) -- could become: nodes.tracers.marker
+ local ex = exheights[font]
+ local em = emwidths [font]
+ head, current = insert_node_after(head,current,new_penalty(10000))
+ head, current = insert_node_after(head,current,new_glue(-0.05*em))
+ head, current = insert_node_after(head,current,new_rule(0.05*em,1.5*ex,0.5*ex))
+ setnodecolor(current,color)
+ return head, current
+end
+
+-- local function process(handler,head,first,last)
+-- dataset = numbertodataset[first[a_scriptsplitting]]
+-- stretch = emwidths[first.font]*dataset.inter_word_stretch_factor
+-- return insert_node_after(head,last,new_glue(0,stretch))
+-- end
+--
+-- local cache = { } table.setmetatableindex(cache,function(t,k)
+-- local v = new_gluespec(0,k)
+-- nodepool.register(v)
+-- t[k] = v
+-- return v
+-- end)
+-- return insert_node_after(head,last,new_glue(cache[last_s]))
+
+local last_a, last_f, last_s, last_q
+
+function splitters.insertafter(handler,head,first,last,detail)
+ local a = first[a_scriptsplitting]
+ local f = first.font
+ if a ~= last_a or f ~= last_f then
+ last_s = emwidths[f] * numbertodataset[a].inter_word_stretch_factor
+ last_a = a
+ last_f = f
+ end
+ if trace_splitting then
+ head, last = marker(head,last,f,detail == 2 and "trace:r" or "trace:g")
+ end
+ if ignore then
+ return head, last
+ else
+ return insert_node_after(head,last,new_glue(0,last_s))
+ end
+end
+
+-- word-xx.lua:
+--
+-- return {
+-- comment = "test",
+-- copyright = "not relevant",
+-- language = "en",
+-- timestamp = "2013-05-20 14:15:21",
+-- version = "1.00",
+-- lists = {
+-- {
+-- -- data = "we thrive information in thick worlds because of our marvelous and everyday capacity to select edit single out structure highlight group pair merge harmonize synthesize focus organize condense reduce boil down choose categorize catalog classify list abstract scan look into idealize isolate discriminate distinguish screen pigeonhole pick over sort integrate blend inspect filter lump skip smooth chunk average approximate cluster aggregate outline summarize itemize review dip into flip through browse glance into leaf through skim refine enumerate glean synopsize winnow the wheat from the chaff and separate the sheep from the goats",
+-- data = "abstract aggregate and approximate average because blend boil browse capacity catalog categorize chaff choose chunk classify cluster condense dip discriminate distinguish down edit enumerate everyday filter flip focus from glance glean goats group harmonize highlight idealize in information inspect integrate into isolate itemize leaf list look lump marvelous merge of organize our out outline over pair pick pigeonhole reduce refine review scan screen select separate sheep single skim skip smooth sort structure summarize synopsize synthesize the thick thrive through to we wheat winnow worlds",
+-- },
+-- },
+-- }
+
+scripts.installmethod {
+ name = "test",
+ splitter = splitters.insertafter,
+ initializer = splitters.load,
+ files = {
+ -- "scrp-imp-word-test.lua",
+ "word-xx.lua",
+ },
+ datasets = {
+ default = {
+ inter_word_stretch_factor = 0.25, -- of quad
+ },
+ },
+}
+
-- new plugin:
local registercontext = fonts.specifiers.registercontext