summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/lang-wrd.lua
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
commit8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/lang-wrd.lua
parentf5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
downloadcontext-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
2016-01-12 16:26:00
Diffstat (limited to 'tex/context/base/mkiv/lang-wrd.lua')
-rw-r--r--tex/context/base/mkiv/lang-wrd.lua387
1 files changed, 387 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/lang-wrd.lua b/tex/context/base/mkiv/lang-wrd.lua
new file mode 100644
index 000000000..b564a02ae
--- /dev/null
+++ b/tex/context/base/mkiv/lang-wrd.lua
@@ -0,0 +1,387 @@
+if not modules then modules = { } end modules ['lang-wrd'] = {
+ version = 1.001,
+ comment = "companion to lang-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local lower = string.lower
+local utfchar = utf.char
+local concat = table.concat
+local lpegmatch = lpeg.match
+local P, S, Cs = lpeg.P, lpeg.S, lpeg.Cs
+
+local report_words = logs.reporter("languages","words")
+
+local nodes = nodes
+local languages = languages
+
+local implement = interfaces.implement
+
+languages.words = languages.words or { }
+local words = languages.words
+
+words.data = words.data or { }
+words.enables = false
+words.threshold = 4
+
+local numbers = languages.numbers
+local registered = languages.registered
+
+local nuts = nodes.nuts
+local tonut = nuts.tonut
+
+local getfield = nuts.getfield
+local getnext = nuts.getnext
+local getid = nuts.getid
+local getsubtype = nuts.getsubtype
+local getchar = nuts.getchar
+local setattr = nuts.setattr
+
+local traverse_nodes = nuts.traverse
+
+local wordsdata = words.data
+local chardata = characters.data
+local tasks = nodes.tasks
+
+local unsetvalue = attributes.unsetvalue
+
+local nodecodes = nodes.nodecodes
+local kerncodes = nodes.kerncodes
+
+local glyph_code = nodecodes.glyph
+local disc_code = nodecodes.disc
+local kern_code = nodecodes.kern
+
+local kerning_code = kerncodes.kerning
+local lowerchar = characters.lower
+
+local a_color = attributes.private('color')
+local colist = attributes.list[a_color]
+
+local is_letter = characters.is_letter -- maybe is_character as variant
+
+local spacing = S(" \n\r\t")
+local markup = S("-=")
+local lbrace = P("{")
+local rbrace = P("}")
+local disc = (lbrace * (1-rbrace)^0 * rbrace)^1 -- or just 3 times, time this
+local word = Cs((markup/"" + disc/"" + (1-spacing))^1)
+
+local loaded = { } -- we share lists
+
+function words.load(tag,filename)
+ local fullname = resolvers.findfile(filename,'other text file') or ""
+ if fullname ~= "" then
+ report_words("loading word file %a",fullname)
+ statistics.starttiming(languages)
+ local list = loaded[fullname]
+ if not list then
+ list = wordsdata[tag] or { }
+ local parser = (spacing + word/function(s) list[s] = true end)^0
+ lpegmatch(parser,io.loaddata(fullname) or "")
+ loaded[fullname] = list
+ end
+ wordsdata[tag] = list
+ statistics.stoptiming(languages)
+ else
+ report_words("missing word file %a",filename)
+ end
+end
+
+function words.found(id, str)
+ local tag = languages.numbers[id]
+ if tag then
+ local data = wordsdata[tag]
+ if data then
+ if data[str] then
+ return 1
+ elseif data[lower(str)] then
+ return 2
+ end
+ end
+ end
+end
+
+-- The following code is an adaption of experimental code for hyphenating and
+-- spell checking.
+
+-- there is an n=1 problem somewhere in nested boxes
+
+local function mark_words(head,whenfound) -- can be optimized and shared
+ local current, language, done = tonut(head), nil, nil, 0, false
+ local str, s, nds, n = { }, 0, { }, 0 -- n could also be a table, saves calls
+ local function action()
+ if s > 0 then
+ local word = concat(str,"",1,s)
+ local mark = whenfound(language,word)
+ if mark then
+ done = true
+ for i=1,n do
+ mark(nds[i])
+ end
+ end
+ end
+ n, s = 0, 0
+ end
+ while current do
+ local id = getid(current)
+ if id == glyph_code then
+ local a = getfield(current,"lang")
+ if a then
+ if a ~= language then
+ if s > 0 then
+ action()
+ end
+ language = a
+ end
+ elseif s > 0 then
+ action()
+ language = a
+ end
+ local components = getfield(current,"components")
+ if components then
+ n = n + 1
+ nds[n] = current
+ for g in traverse_nodes(components) do
+ s = s + 1
+ str[s] = utfchar(getchar(g))
+ end
+ else
+ local code = getchar(current)
+ local data = chardata[code]
+ if is_letter[data.category] then
+ n = n + 1
+ nds[n] = current
+ s = s + 1
+ str[s] = utfchar(code)
+ elseif s > 0 then
+ action()
+ end
+ end
+ elseif id == disc_code then -- take the replace
+ if n > 0 then
+ n = n + 1
+ nds[n] = current
+ end
+ elseif id == kern_code and getsubtype(current) == kerning_code and s > 0 then
+ -- ok
+ elseif s > 0 then
+ action()
+ end
+ current = getnext(current)
+ end
+ if s > 0 then
+ action()
+ end
+ return head, done
+end
+
+local methods = { }
+words.methods = methods
+
+local enablers = { }
+words.enablers = enablers
+
+local wordmethod = 1
+local enabled = false
+
+function words.check(head)
+ if enabled then
+ return methods[wordmethod](head)
+ elseif not head then
+ return head, false
+ else
+ return head, false
+ end
+end
+
+function words.enable(settings)
+ local method = settings.method
+ wordmethod = method and tonumber(method) or wordmethod or 1
+ local e = enablers[wordmethod]
+ if e then e(settings) end
+ tasks.enableaction("processors","languages.words.check")
+ enabled = true
+end
+
+function words.disable()
+ enabled = false
+end
+
+-- colors
+
+local cache = { } -- can also be done with method 1 -- frozen colors once used
+
+table.setmetatableindex(cache, function(t,k) -- k == language, numbers[k] == tag
+ local c
+ if type(k) == "string" then
+ c = colist[k]
+ elseif k < 0 then
+ c = colist["word:unset"]
+ else
+ c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"]
+ end
+ local v = c and function(n) setattr(n,a_color,c) end or false
+ t[k] = v
+ return v
+end)
+
+-- method 1
+
+local function sweep(language,str)
+ if #str < words.threshold then
+ return false
+ elseif words.found(language,str) then -- can become a local wordsfound
+ return cache["word:yes"] -- maybe variables.yes
+ else
+ return cache["word:no"]
+ end
+end
+
+methods[1] = function(head)
+ for n in traverse_nodes(head) do
+ setattr(n,a_color,unsetvalue) -- hm, not that selective (reset color)
+ end
+ return mark_words(head,sweep)
+end
+
+-- method 2
+
+local dumpname = nil
+local dumpthem = false
+local listname = "document"
+
+local category = { }
+local categories = { }
+
+setmetatable(categories, {
+ __index = function(t,k)
+ local languages = { }
+ setmetatable(languages, {
+ __index = function(t,k)
+ local r = registered[k]
+ local v = {
+ number = language,
+ parent = r and r.parent or nil,
+ patterns = r and r.patterns or nil,
+ tag = r and r.tag or nil,
+ list = { },
+ total = 0,
+ unique = 0,
+ }
+ t[k] = v
+ return v
+ end
+ } )
+ local v = {
+ languages = languages,
+ total = 0,
+ }
+ t[k] = v
+ return v
+ end
+} )
+
+local collected = {
+ total = 0,
+ version = 1.000,
+ categories = categories,
+}
+
+enablers[2] = function(settings)
+ local name = settings.list
+ listname = name and name ~= "" and name or "document"
+ category = collected.categories[listname]
+end
+
+local function sweep(language,str)
+ if #str >= words.threshold then
+ str = lowerchar(str)
+ local words = category.languages[numbers[language] or "unset"]
+ local list = words.list
+ local ls = list[str]
+ if ls then
+ list[str] = ls + 1
+ else
+ list[str] = 1
+ words.unique = words.unique + 1
+ end
+ collected.total = collected.total + 1
+ category.total = category.total + 1
+ words.total = words.total + 1
+ end
+end
+
+methods[2] = function(head)
+ dumpthem = true
+ return mark_words(head,sweep)
+end
+
+local function dumpusedwords()
+ if dumpthem then
+ collected.threshold = words.threshold
+ dumpname = dumpname or file.addsuffix(tex.jobname,"words")
+ report_words("saving list of used words in %a",dumpname)
+ io.savedata(dumpname,table.serialize(collected,true))
+ -- table.tofile(dumpname,list,true)
+ end
+end
+
+directives.register("languages.words.dump", function(v)
+ dumpname = type(v) == "string" and v ~= "" and v
+end)
+
+luatex.registerstopactions(dumpusedwords)
+
+-- method 3
+
+local function sweep(language,str)
+ return cache[language]
+end
+
+methods[3] = function(head)
+ for n in traverse_nodes(head) do
+ setattr(n,a_color,unsetvalue)
+ end
+ return mark_words(head,sweep)
+end
+
+-- for the moment we hook it into the attribute handler
+
+-- languagehacks = { }
+
+-- function languagehacks.process(namespace,attribute,head)
+-- return languages.check(head)
+-- end
+
+-- chars.plugins[chars.plugins+1] = {
+-- name = "language",
+-- namespace = languagehacks,
+-- processor = languagehacks.process
+-- }
+
+-- interface
+
+implement {
+ name = "enablespellchecking",
+ actions = words.enable,
+ arguments = {
+ {
+ { "method" },
+ { "list" }
+ }
+ }
+}
+
+implement {
+ name = "disablespellchecking",
+ actions = words.disable
+}
+
+implement {
+ name = "loadspellchecklist",
+ arguments = { "string", "string" },
+ actions = words.load
+}