2016-01-12 16:26:00

author: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
committer: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
commit: 8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree: 94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/lang-wrd.lua
parent: f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download: context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
1 files changed, 387 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/lang-wrd.lua b/tex/context/base/mkiv/lang-wrd.lua
new file mode 100644
index 000000000..b564a02ae
--- /dev/null
+++ b/tex/context/base/mkiv/lang-wrd.lua
@@ -0,0 +1,387 @@
+if not modules then modules = { } end modules ['lang-wrd'] = {
+    version   = 1.001,
+    comment   = "companion to lang-ini.mkiv",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files"
+}
+
+local lower = string.lower
+local utfchar = utf.char
+local concat = table.concat
+local lpegmatch = lpeg.match
+local P, S, Cs = lpeg.P, lpeg.S, lpeg.Cs
+
+local report_words = logs.reporter("languages","words")
+
+local nodes           = nodes
+local languages       = languages
+
+local implement       = interfaces.implement
+
+languages.words       = languages.words or { }
+local words           = languages.words
+
+words.data            = words.data or { }
+words.enables         = false
+words.threshold       = 4
+
+local numbers         = languages.numbers
+local registered      = languages.registered
+
+local nuts            = nodes.nuts
+local tonut           = nuts.tonut
+
+local getfield        = nuts.getfield
+local getnext         = nuts.getnext
+local getid           = nuts.getid
+local getsubtype      = nuts.getsubtype
+local getchar         = nuts.getchar
+local setattr         = nuts.setattr
+
+local traverse_nodes  = nuts.traverse
+
+local wordsdata       = words.data
+local chardata        = characters.data
+local tasks           = nodes.tasks
+
+local unsetvalue      = attributes.unsetvalue
+
+local nodecodes       = nodes.nodecodes
+local kerncodes       = nodes.kerncodes
+
+local glyph_code      = nodecodes.glyph
+local disc_code       = nodecodes.disc
+local kern_code       = nodecodes.kern
+
+local kerning_code    = kerncodes.kerning
+local lowerchar       = characters.lower
+
+local a_color         = attributes.private('color')
+local colist          = attributes.list[a_color]
+
+local is_letter       = characters.is_letter -- maybe is_character as variant
+
+local spacing = S(" \n\r\t")
+local markup  = S("-=")
+local lbrace  = P("{")
+local rbrace  = P("}")
+local disc    = (lbrace * (1-rbrace)^0 * rbrace)^1 -- or just 3 times, time this
+local word    = Cs((markup/"" + disc/"" + (1-spacing))^1)
+
+local loaded = { } -- we share lists
+
+function words.load(tag,filename)
+    local fullname = resolvers.findfile(filename,'other text file') or ""
+    if fullname ~= "" then
+        report_words("loading word file %a",fullname)
+        statistics.starttiming(languages)
+        local list = loaded[fullname]
+        if not list then
+            list = wordsdata[tag] or { }
+            local parser = (spacing + word/function(s) list[s] = true end)^0
+            lpegmatch(parser,io.loaddata(fullname) or "")
+            loaded[fullname] = list
+        end
+        wordsdata[tag] = list
+        statistics.stoptiming(languages)
+    else
+        report_words("missing word file %a",filename)
+    end
+end
+
+function words.found(id, str)
+    local tag = languages.numbers[id]
+    if tag then
+        local data = wordsdata[tag]
+        if data then
+            if data[str] then
+                return 1
+            elseif data[lower(str)] then
+                return 2
+            end
+        end
+    end
+end
+
+-- The following code is an adaption of experimental code for hyphenating and
+-- spell checking.
+
+-- there is an n=1 problem somewhere in nested boxes
+
+local function mark_words(head,whenfound) -- can be optimized and shared
+    local current, language, done = tonut(head), nil, nil, 0, false
+    local str, s, nds, n = { }, 0, { }, 0 -- n could also be a table, saves calls
+    local function action()
+        if s > 0 then
+            local word = concat(str,"",1,s)
+            local mark = whenfound(language,word)
+            if mark then
+                done = true
+                for i=1,n do
+                    mark(nds[i])
+                end
+            end
+        end
+        n, s = 0, 0
+    end
+    while current do
+        local id = getid(current)
+        if id == glyph_code then
+            local a = getfield(current,"lang")
+            if a then
+                if a ~= language then
+                    if s > 0 then
+                        action()
+                    end
+                    language = a
+                end
+            elseif s > 0 then
+                action()
+                language = a
+            end
+            local components = getfield(current,"components")
+            if components then
+                n = n + 1
+                nds[n] = current
+                for g in traverse_nodes(components) do
+                    s = s + 1
+                    str[s] = utfchar(getchar(g))
+                end
+            else
+                local code = getchar(current)
+                local data = chardata[code]
+                if is_letter[data.category] then
+                    n = n + 1
+                    nds[n] = current
+                    s = s + 1
+                    str[s] = utfchar(code)
+                elseif s > 0 then
+                    action()
+                end
+            end
+        elseif id == disc_code then -- take the replace
+            if n > 0 then
+                n = n + 1
+                nds[n] = current
+            end
+        elseif id == kern_code and getsubtype(current) == kerning_code and s > 0 then
+            -- ok
+        elseif s > 0 then
+            action()
+        end
+        current = getnext(current)
+    end
+    if s > 0 then
+        action()
+    end
+    return head, done
+end
+
+local methods  = { }
+words.methods  = methods
+
+local enablers = { }
+words.enablers = enablers
+
+local wordmethod = 1
+local enabled    = false
+
+function words.check(head)
+    if enabled then
+        return methods[wordmethod](head)
+    elseif not head then
+        return head, false
+    else
+        return head, false
+    end
+end
+
+function words.enable(settings)
+    local method = settings.method
+    wordmethod = method and tonumber(method) or wordmethod or 1
+    local e = enablers[wordmethod]
+    if e then e(settings) end
+    tasks.enableaction("processors","languages.words.check")
+    enabled = true
+end
+
+function words.disable()
+    enabled = false
+end
+
+-- colors
+
+local cache = { } -- can also be done with method 1 -- frozen colors once used
+
+table.setmetatableindex(cache, function(t,k) -- k == language, numbers[k] == tag
+    local c
+    if type(k) == "string" then
+        c = colist[k]
+    elseif k < 0 then
+        c = colist["word:unset"]
+    else
+        c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"]
+    end
+    local v = c and function(n) setattr(n,a_color,c) end or false
+    t[k] = v
+    return v
+end)
+
+-- method 1
+
+local function sweep(language,str)
+    if #str < words.threshold then
+        return false
+    elseif words.found(language,str) then -- can become a local wordsfound
+        return cache["word:yes"] -- maybe variables.yes
+    else
+        return cache["word:no"]
+    end
+end
+
+methods[1] = function(head)
+    for n in traverse_nodes(head) do
+        setattr(n,a_color,unsetvalue) -- hm, not that selective (reset color)
+    end
+    return mark_words(head,sweep)
+end
+
+-- method 2
+
+local dumpname   = nil
+local dumpthem   = false
+local listname   = "document"
+
+local category   = { }
+local categories = { }
+
+setmetatable(categories, {
+    __index = function(t,k)
+        local languages = { }
+        setmetatable(languages, {
+            __index = function(t,k)
+                local r = registered[k]
+                local v = {
+                    number   = language,
+                    parent   = r and r.parent   or nil,
+                    patterns = r and r.patterns or nil,
+                    tag      = r and r.tag      or nil,
+                    list     = { },
+                    total    = 0,
+                    unique   = 0,
+                }
+                t[k] = v
+                return v
+            end
+        } )
+        local v = {
+            languages = languages,
+            total     = 0,
+        }
+        t[k] = v
+        return v
+    end
+} )
+
+local collected  = {
+    total      = 0,
+    version    = 1.000,
+    categories = categories,
+}
+
+enablers[2] = function(settings)
+    local name = settings.list
+    listname = name and name ~= "" and name or "document"
+    category = collected.categories[listname]
+end
+
+local function sweep(language,str)
+    if #str >= words.threshold then
+        str = lowerchar(str)
+        local words = category.languages[numbers[language] or "unset"]
+        local list = words.list
+        local ls = list[str]
+        if ls then
+            list[str] = ls + 1
+        else
+            list[str] = 1
+            words.unique = words.unique + 1
+        end
+        collected.total = collected.total + 1
+        category.total = category.total + 1
+        words.total = words.total + 1
+    end
+end
+
+methods[2] = function(head)
+    dumpthem = true
+    return mark_words(head,sweep)
+end
+
+local function dumpusedwords()
+    if dumpthem then
+        collected.threshold = words.threshold
+        dumpname = dumpname or file.addsuffix(tex.jobname,"words")
+        report_words("saving list of used words in %a",dumpname)
+        io.savedata(dumpname,table.serialize(collected,true))
+     -- table.tofile(dumpname,list,true)
+    end
+end
+
+directives.register("languages.words.dump", function(v)
+    dumpname = type(v) == "string" and v ~= "" and v
+end)
+
+luatex.registerstopactions(dumpusedwords)
+
+-- method 3
+
+local function sweep(language,str)
+    return cache[language]
+end
+
+methods[3] = function(head)
+    for n in traverse_nodes(head) do
+        setattr(n,a_color,unsetvalue)
+    end
+    return mark_words(head,sweep)
+end
+
+-- for the moment we hook it into the attribute handler
+
+-- languagehacks = { }
+
+-- function languagehacks.process(namespace,attribute,head)
+--     return languages.check(head)
+-- end
+
+-- chars.plugins[chars.plugins+1] = {
+--     name = "language",
+--     namespace = languagehacks,
+--     processor = languagehacks.process
+-- }
+
+-- interface
+
+implement {
+    name      = "enablespellchecking",
+    actions   = words.enable,
+    arguments = {
+        {
+            { "method" },
+            { "list" }
+        }
+    }
+}
+
+implement {
+    name      = "disablespellchecking",
+    actions   = words.disable
+}
+
+implement {
+    name      = "loadspellchecklist",
+    arguments = { "string", "string" },
+    actions   = words.load
+}
author	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
committer	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
commit	8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree	94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/lang-wrd.lua
parent	f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download	context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz