diff options
author | Hans Hagen <pragma@wxs.nl> | 2010-10-21 14:02:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2010-10-21 14:02:00 +0200 |
commit | d28ab89ebb3382dccaf69fcc582f2fe4a1571dc0 (patch) | |
tree | f7141de9d6fc6511ecca0957a4c113bcf42e3b6e /tex/context/base/lang-wrd.lua | |
parent | 06c355066a4cf2af674302948c2f3caee06932f2 (diff) | |
download | context-d28ab89ebb3382dccaf69fcc582f2fe4a1571dc0.tar.gz |
beta 2010.10.21 14:02
Diffstat (limited to 'tex/context/base/lang-wrd.lua')
-rw-r--r-- | tex/context/base/lang-wrd.lua | 126 |
1 files changed, 86 insertions, 40 deletions
diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua index 4d131f45a..7f7099c6d 100644 --- a/tex/context/base/lang-wrd.lua +++ b/tex/context/base/lang-wrd.lua @@ -22,6 +22,9 @@ words.data = words.data or { } words.enables = false words.threshold = 4 +local numbers = languages.numbers +local registered = languages.registered + local set_attribute = node.set_attribute local unset_attribute = node.unset_attribute local traverse_nodes = node.traverse @@ -40,11 +43,9 @@ local kerning_code = kerncodes.kerning local lowerchar = characters.lower local a_color = attributes.private('color') +local colist = attributes.list[a_color] -words.colors = { - ["known"] = "green", - ["unknown"] = "red", -} +local is_letter = characters.is_letter -- maybe is_character as variant local spacing = S(" \n\r\t") local markup = S("-=") @@ -86,7 +87,9 @@ end -- The following code is an adaption of experimental code for -- hyphenating and spell checking. -local function mark_words(head,whenfound) -- can be optimized +-- there is an n=1 problem somewhere in nested boxes + +local function mark_words(head,whenfound) -- can be optimized and shared local current, start, str, language, n, done = head, nil, "", nil, 0, false local function action() if #str > 0 then @@ -127,9 +130,10 @@ local function mark_words(head,whenfound) -- can be optimized local code = current.char local data = chardata[code] if data.uccode or data.lccode then +--~ if is_letter[code] then -- why does this fail start = start or current n = n + 1 - str = str .. utfchar(code) + str = str .. utfchar(code) -- slow, maybe str should be a table elseif start then action() end @@ -161,7 +165,7 @@ local wordmethod = 1 local enabled = false function words.check(head) - if enabled and head.next then + if enabled then return methods[wordmethod](head) else return head, false @@ -181,27 +185,39 @@ function words.disable() enabled = false end --- method 1 +-- colors + +local cache = { } -- can also be done with method 1 -- frozen colors once used -local colors = words.colors -local colist = attributes.list[a_color] +setmetatable(cache, { + __index = function(t,k) -- k == language, numbers[k] == tag + local c + if k < 0 then + c = colist["word:unset"] + else + c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"] + end + local v = c and function(n) set_attribute(n,a_color,c) end or false + t[k] = v + return v + end +} ) -local right = function(n) set_attribute(n,a_color,colist[colors.known]) end -local wrong = function(n) set_attribute(n,a_color,colist[colors.unknown]) end +-- method 1 local function sweep(language,str) if #str < words.threshold then return false - elseif words.found(language,str) then - return right + elseif words.found(language,str) then -- can become a local wordsfound + return cache["word:yes"] -- maybe variables.yes else - return wrong + return cache["word:no"] end end methods[1] = function(head) for n in traverse_nodes(head) do - unset_attribute(n,attribute) -- hm, not that selective (reset color) + unset_attribute(n,a_color) -- hm, not that selective (reset color) end return mark_words(head,sweep) end @@ -213,46 +229,63 @@ local dumpthem = false local listname = "document" local category = { } +local categories = { } + +setmetatable(categories, { + __index = function(t,k) + local languages = { } + setmetatable(languages, { + __index = function(t,k) + local r = registered[k] + local v = { + number = language, + parent = r and r.parent or nil, + patterns = r and r.patterns or nil, + tag = r and r.tag or nil, + list = { }, + total = 0, + unique = 0, + } + t[k] = v + return v + end + } ) + local v = { + languages = languages, + total = 0, + } + t[k] = v + return v + end +} ) local collected = { total = 0, - categories = { document = { total = 0, list = { } } }, + version = 1.000, + categories = categories, } enablers[2] = function(settings) local name = settings.list listname = name and name ~= "" and name or "document" category = collected.categories[listname] - if not category then - category = { } - collected.categories[listname] = category - end end -local numbers = languages.numbers -local registered = languages.registered - local function sweep(language,str) if #str >= words.threshold then - collected.total = collected.total + 1 str = lowerchar(str) - local number = numbers[language] or "unset" - local words = category[number] - if not words then - local r = registered[number] - category[number] = { - number = language, - parent = r and r.parent or nil, - patterns = r and r.patterns or nil, - tag = r and r.tag or nil, - list = { [str] = 1 }, - total = 1, - } + local words = category.languages[numbers[language] or "unset"] + local list = words.list + local ls = list[str] + if ls then + list[str] = ls + 1 else - local list = words.list - list[str] = (list[str] or 0) + 1 - words.total = words.total + 1 + list[str] = 1 + words.unique = words.unique + 1 end + collected.total = collected.total + 1 + category.total = category.total + 1 + words.total = words.total + 1 end end @@ -277,6 +310,19 @@ end) luatex.registerstopactions(dumpusedwords) +-- method 3 + +local function sweep(language,str) + return cache[language] +end + +methods[3] = function(head) + for n in traverse_nodes(head) do + unset_attribute(n,a_color) + end + return mark_words(head,sweep) +end + -- for the moment we hook it into the attribute handler --~ languagehacks = { } |