summaryrefslogtreecommitdiff
path: root/tex/context/base/lang-wrd.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/lang-wrd.lua')
-rw-r--r--tex/context/base/lang-wrd.lua706
1 files changed, 353 insertions, 353 deletions
diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua
index 06a2311a6..6a9b39fdf 100644
--- a/tex/context/base/lang-wrd.lua
+++ b/tex/context/base/lang-wrd.lua
@@ -1,353 +1,353 @@
-if not modules then modules = { } end modules ['lang-wrd'] = {
- version = 1.001,
- comment = "companion to lang-ini.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-local lower = string.lower
-local utfchar = utf.char
-local concat = table.concat
-local lpegmatch = lpeg.match
-local P, S, Cs = lpeg.P, lpeg.S, lpeg.Cs
-
-local report_words = logs.reporter("languages","words")
-
-local nodes, node, languages = nodes, node, languages
-
-languages.words = languages.words or { }
-local words = languages.words
-
-words.data = words.data or { }
-words.enables = false
-words.threshold = 4
-
-local numbers = languages.numbers
-local registered = languages.registered
-
-local traverse_nodes = node.traverse
-local wordsdata = words.data
-local chardata = characters.data
-local tasks = nodes.tasks
-
-local unsetvalue = attributes.unsetvalue
-
-local nodecodes = nodes.nodecodes
-local kerncodes = nodes.kerncodes
-
-local glyph_code = nodecodes.glyph
-local disc_code = nodecodes.disc
-local kern_code = nodecodes.kern
-
-local kerning_code = kerncodes.kerning
-local lowerchar = characters.lower
-
-local a_color = attributes.private('color')
-local colist = attributes.list[a_color]
-
-local is_letter = characters.is_letter -- maybe is_character as variant
-
-local spacing = S(" \n\r\t")
-local markup = S("-=")
-local lbrace = P("{")
-local rbrace = P("}")
-local disc = (lbrace * (1-rbrace)^0 * rbrace)^1 -- or just 3 times, time this
-local word = Cs((markup/"" + disc/"" + (1-spacing))^1)
-
-local loaded = { } -- we share lists
-
-function words.load(tag,filename)
- local fullname = resolvers.findfile(filename,'other text file') or ""
- if fullname ~= "" then
- report_words("loading word file %a",fullname)
- statistics.starttiming(languages)
- local list = loaded[fullname]
- if not list then
- list = wordsdata[tag] or { }
- local parser = (spacing + word/function(s) list[s] = true end)^0
- lpegmatch(parser,io.loaddata(fullname) or "")
- loaded[fullname] = list
- end
- wordsdata[tag] = list
- statistics.stoptiming(languages)
- else
- report_words("missing word file %a",filename)
- end
-end
-
-function words.found(id, str)
- local tag = languages.numbers[id]
- if tag then
- local data = wordsdata[tag]
- if data then
- if data[str] then
- return 1
- elseif data[lower(str)] then
- return 2
- end
- end
- end
-end
-
--- The following code is an adaption of experimental code for hyphenating and
--- spell checking.
-
--- there is an n=1 problem somewhere in nested boxes
-
-local function mark_words(head,whenfound) -- can be optimized and shared
- local current, language, done = head, nil, nil, 0, false
- local str, s, nds, n = { }, 0, { }, 0 -- n could also be a table, saves calls
- local function action()
- if s > 0 then
- local word = concat(str,"",1,s)
- local mark = whenfound(language,word)
- if mark then
- done = true
- for i=1,n do
- mark(nds[i])
- end
- end
- end
- n, s = 0, 0
- end
- while current do
- local id = current.id
- if id == glyph_code then
- local a = current.lang
- if a then
- if a ~= language then
- if s > 0 then
- action()
- end
- language = a
- end
- elseif s > 0 then
- action()
- language = a
- end
- local components = current.components
- if components then
- n = n + 1
- nds[n] = current
- for g in traverse_nodes(components) do
- s = s + 1
- str[s] = utfchar(g.char)
- end
- else
- local code = current.char
- local data = chardata[code]
- if is_letter[data.category] then
- n = n + 1
- nds[n] = current
- s = s + 1
- str[s] = utfchar(code)
- elseif s > 0 then
- action()
- end
- end
- elseif id == disc_code then -- take the replace
- if n > 0 then
- n = n + 1
- nds[n] = current
- end
- elseif id == kern_code and current.subtype == kerning_code and s > 0 then
- -- ok
- elseif s > 0 then
- action()
- end
- current = current.next
- end
- if s > 0 then
- action()
- end
- return head, done
-end
-
-local methods = { }
-words.methods = methods
-
-local enablers = { }
-words.enablers = enablers
-
-local wordmethod = 1
-local enabled = false
-
-function words.check(head)
- if enabled then
- return methods[wordmethod](head)
- else
- return head, false
- end
-end
-
-function words.enable(settings)
- local method = settings.method
- wordmethod = method and tonumber(method) or wordmethod or 1
- local e = enablers[wordmethod]
- if e then e(settings) end
- tasks.enableaction("processors","languages.words.check")
- enabled = true
-end
-
-function words.disable()
- enabled = false
-end
-
--- colors
-
-local cache = { } -- can also be done with method 1 -- frozen colors once used
-
-table.setmetatableindex(cache, function(t,k) -- k == language, numbers[k] == tag
- local c
- if type(k) == "string" then
- c = colist[k]
- elseif k < 0 then
- c = colist["word:unset"]
- else
- c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"]
- end
- local v = c and function(n) n[a_color] = c end or false
- t[k] = v
- return v
-end)
-
--- method 1
-
-local function sweep(language,str)
- if #str < words.threshold then
- return false
- elseif words.found(language,str) then -- can become a local wordsfound
- return cache["word:yes"] -- maybe variables.yes
- else
- return cache["word:no"]
- end
-end
-
-methods[1] = function(head)
- for n in traverse_nodes(head) do
- n[a_color] = unsetvalue -- hm, not that selective (reset color)
- end
- return mark_words(head,sweep)
-end
-
--- method 2
-
-local dumpname = nil
-local dumpthem = false
-local listname = "document"
-
-local category = { }
-local categories = { }
-
-setmetatable(categories, {
- __index = function(t,k)
- local languages = { }
- setmetatable(languages, {
- __index = function(t,k)
- local r = registered[k]
- local v = {
- number = language,
- parent = r and r.parent or nil,
- patterns = r and r.patterns or nil,
- tag = r and r.tag or nil,
- list = { },
- total = 0,
- unique = 0,
- }
- t[k] = v
- return v
- end
- } )
- local v = {
- languages = languages,
- total = 0,
- }
- t[k] = v
- return v
- end
-} )
-
-local collected = {
- total = 0,
- version = 1.000,
- categories = categories,
-}
-
-enablers[2] = function(settings)
- local name = settings.list
- listname = name and name ~= "" and name or "document"
- category = collected.categories[listname]
-end
-
-local function sweep(language,str)
- if #str >= words.threshold then
- str = lowerchar(str)
- local words = category.languages[numbers[language] or "unset"]
- local list = words.list
- local ls = list[str]
- if ls then
- list[str] = ls + 1
- else
- list[str] = 1
- words.unique = words.unique + 1
- end
- collected.total = collected.total + 1
- category.total = category.total + 1
- words.total = words.total + 1
- end
-end
-
-methods[2] = function(head)
- dumpthem = true
- return mark_words(head,sweep)
-end
-
-local function dumpusedwords()
- if dumpthem then
- collected.threshold = words.threshold
- dumpname = dumpname or file.addsuffix(tex.jobname,"words")
- report_words("saving list of used words in %a",dumpname)
- io.savedata(dumpname,table.serialize(collected,true))
- -- table.tofile(dumpname,list,true)
- end
-end
-
-directives.register("languages.words.dump", function(v)
- dumpname = type(v) == "string" and v ~= "" and v
-end)
-
-luatex.registerstopactions(dumpusedwords)
-
--- method 3
-
-local function sweep(language,str)
- return cache[language]
-end
-
-methods[3] = function(head)
- for n in traverse_nodes(head) do
- n[a_color] = unsetvalue
- end
- return mark_words(head,sweep)
-end
-
--- for the moment we hook it into the attribute handler
-
---~ languagehacks = { }
-
---~ function languagehacks.process(namespace,attribute,head)
---~ return languages.check(head)
---~ end
-
---~ chars.plugins[chars.plugins+1] = {
---~ name = "language",
---~ namespace = languagehacks,
---~ processor = languagehacks.process
---~ }
-
--- interface
-
-commands.enablespellchecking = words.enable
-commands.disablespellchecking = words.disable
-commands.loadspellchecklist = words.load
+if not modules then modules = { } end modules ['lang-wrd'] = {
+ version = 1.001,
+ comment = "companion to lang-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local lower = string.lower
+local utfchar = utf.char
+local concat = table.concat
+local lpegmatch = lpeg.match
+local P, S, Cs = lpeg.P, lpeg.S, lpeg.Cs
+
+local report_words = logs.reporter("languages","words")
+
+local nodes, node, languages = nodes, node, languages
+
+languages.words = languages.words or { }
+local words = languages.words
+
+words.data = words.data or { }
+words.enables = false
+words.threshold = 4
+
+local numbers = languages.numbers
+local registered = languages.registered
+
+local traverse_nodes = node.traverse
+local wordsdata = words.data
+local chardata = characters.data
+local tasks = nodes.tasks
+
+local unsetvalue = attributes.unsetvalue
+
+local nodecodes = nodes.nodecodes
+local kerncodes = nodes.kerncodes
+
+local glyph_code = nodecodes.glyph
+local disc_code = nodecodes.disc
+local kern_code = nodecodes.kern
+
+local kerning_code = kerncodes.kerning
+local lowerchar = characters.lower
+
+local a_color = attributes.private('color')
+local colist = attributes.list[a_color]
+
+local is_letter = characters.is_letter -- maybe is_character as variant
+
+local spacing = S(" \n\r\t")
+local markup = S("-=")
+local lbrace = P("{")
+local rbrace = P("}")
+local disc = (lbrace * (1-rbrace)^0 * rbrace)^1 -- or just 3 times, time this
+local word = Cs((markup/"" + disc/"" + (1-spacing))^1)
+
+local loaded = { } -- we share lists
+
+function words.load(tag,filename)
+ local fullname = resolvers.findfile(filename,'other text file') or ""
+ if fullname ~= "" then
+ report_words("loading word file %a",fullname)
+ statistics.starttiming(languages)
+ local list = loaded[fullname]
+ if not list then
+ list = wordsdata[tag] or { }
+ local parser = (spacing + word/function(s) list[s] = true end)^0
+ lpegmatch(parser,io.loaddata(fullname) or "")
+ loaded[fullname] = list
+ end
+ wordsdata[tag] = list
+ statistics.stoptiming(languages)
+ else
+ report_words("missing word file %a",filename)
+ end
+end
+
+function words.found(id, str)
+ local tag = languages.numbers[id]
+ if tag then
+ local data = wordsdata[tag]
+ if data then
+ if data[str] then
+ return 1
+ elseif data[lower(str)] then
+ return 2
+ end
+ end
+ end
+end
+
+-- The following code is an adaption of experimental code for hyphenating and
+-- spell checking.
+
+-- there is an n=1 problem somewhere in nested boxes
+
+local function mark_words(head,whenfound) -- can be optimized and shared
+ local current, language, done = head, nil, nil, 0, false
+ local str, s, nds, n = { }, 0, { }, 0 -- n could also be a table, saves calls
+ local function action()
+ if s > 0 then
+ local word = concat(str,"",1,s)
+ local mark = whenfound(language,word)
+ if mark then
+ done = true
+ for i=1,n do
+ mark(nds[i])
+ end
+ end
+ end
+ n, s = 0, 0
+ end
+ while current do
+ local id = current.id
+ if id == glyph_code then
+ local a = current.lang
+ if a then
+ if a ~= language then
+ if s > 0 then
+ action()
+ end
+ language = a
+ end
+ elseif s > 0 then
+ action()
+ language = a
+ end
+ local components = current.components
+ if components then
+ n = n + 1
+ nds[n] = current
+ for g in traverse_nodes(components) do
+ s = s + 1
+ str[s] = utfchar(g.char)
+ end
+ else
+ local code = current.char
+ local data = chardata[code]
+ if is_letter[data.category] then
+ n = n + 1
+ nds[n] = current
+ s = s + 1
+ str[s] = utfchar(code)
+ elseif s > 0 then
+ action()
+ end
+ end
+ elseif id == disc_code then -- take the replace
+ if n > 0 then
+ n = n + 1
+ nds[n] = current
+ end
+ elseif id == kern_code and current.subtype == kerning_code and s > 0 then
+ -- ok
+ elseif s > 0 then
+ action()
+ end
+ current = current.next
+ end
+ if s > 0 then
+ action()
+ end
+ return head, done
+end
+
+local methods = { }
+words.methods = methods
+
+local enablers = { }
+words.enablers = enablers
+
+local wordmethod = 1
+local enabled = false
+
+function words.check(head)
+ if enabled then
+ return methods[wordmethod](head)
+ else
+ return head, false
+ end
+end
+
+function words.enable(settings)
+ local method = settings.method
+ wordmethod = method and tonumber(method) or wordmethod or 1
+ local e = enablers[wordmethod]
+ if e then e(settings) end
+ tasks.enableaction("processors","languages.words.check")
+ enabled = true
+end
+
+function words.disable()
+ enabled = false
+end
+
+-- colors
+
+local cache = { } -- can also be done with method 1 -- frozen colors once used
+
+table.setmetatableindex(cache, function(t,k) -- k == language, numbers[k] == tag
+ local c
+ if type(k) == "string" then
+ c = colist[k]
+ elseif k < 0 then
+ c = colist["word:unset"]
+ else
+ c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"]
+ end
+ local v = c and function(n) n[a_color] = c end or false
+ t[k] = v
+ return v
+end)
+
+-- method 1
+
+local function sweep(language,str)
+ if #str < words.threshold then
+ return false
+ elseif words.found(language,str) then -- can become a local wordsfound
+ return cache["word:yes"] -- maybe variables.yes
+ else
+ return cache["word:no"]
+ end
+end
+
+methods[1] = function(head)
+ for n in traverse_nodes(head) do
+ n[a_color] = unsetvalue -- hm, not that selective (reset color)
+ end
+ return mark_words(head,sweep)
+end
+
+-- method 2
+
+local dumpname = nil
+local dumpthem = false
+local listname = "document"
+
+local category = { }
+local categories = { }
+
+setmetatable(categories, {
+ __index = function(t,k)
+ local languages = { }
+ setmetatable(languages, {
+ __index = function(t,k)
+ local r = registered[k]
+ local v = {
+ number = language,
+ parent = r and r.parent or nil,
+ patterns = r and r.patterns or nil,
+ tag = r and r.tag or nil,
+ list = { },
+ total = 0,
+ unique = 0,
+ }
+ t[k] = v
+ return v
+ end
+ } )
+ local v = {
+ languages = languages,
+ total = 0,
+ }
+ t[k] = v
+ return v
+ end
+} )
+
+local collected = {
+ total = 0,
+ version = 1.000,
+ categories = categories,
+}
+
+enablers[2] = function(settings)
+ local name = settings.list
+ listname = name and name ~= "" and name or "document"
+ category = collected.categories[listname]
+end
+
+local function sweep(language,str)
+ if #str >= words.threshold then
+ str = lowerchar(str)
+ local words = category.languages[numbers[language] or "unset"]
+ local list = words.list
+ local ls = list[str]
+ if ls then
+ list[str] = ls + 1
+ else
+ list[str] = 1
+ words.unique = words.unique + 1
+ end
+ collected.total = collected.total + 1
+ category.total = category.total + 1
+ words.total = words.total + 1
+ end
+end
+
+methods[2] = function(head)
+ dumpthem = true
+ return mark_words(head,sweep)
+end
+
+local function dumpusedwords()
+ if dumpthem then
+ collected.threshold = words.threshold
+ dumpname = dumpname or file.addsuffix(tex.jobname,"words")
+ report_words("saving list of used words in %a",dumpname)
+ io.savedata(dumpname,table.serialize(collected,true))
+ -- table.tofile(dumpname,list,true)
+ end
+end
+
+directives.register("languages.words.dump", function(v)
+ dumpname = type(v) == "string" and v ~= "" and v
+end)
+
+luatex.registerstopactions(dumpusedwords)
+
+-- method 3
+
+local function sweep(language,str)
+ return cache[language]
+end
+
+methods[3] = function(head)
+ for n in traverse_nodes(head) do
+ n[a_color] = unsetvalue
+ end
+ return mark_words(head,sweep)
+end
+
+-- for the moment we hook it into the attribute handler
+
+--~ languagehacks = { }
+
+--~ function languagehacks.process(namespace,attribute,head)
+--~ return languages.check(head)
+--~ end
+
+--~ chars.plugins[chars.plugins+1] = {
+--~ name = "language",
+--~ namespace = languagehacks,
+--~ processor = languagehacks.process
+--~ }
+
+-- interface
+
+commands.enablespellchecking = words.enable
+commands.disablespellchecking = words.disable
+commands.loadspellchecklist = words.load