diff options
author | Hans Hagen <pragma@wxs.nl> | 2010-10-21 14:02:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2010-10-21 14:02:00 +0200 |
commit | d28ab89ebb3382dccaf69fcc582f2fe4a1571dc0 (patch) | |
tree | f7141de9d6fc6511ecca0957a4c113bcf42e3b6e /tex | |
parent | 06c355066a4cf2af674302948c2f3caee06932f2 (diff) | |
download | context-d28ab89ebb3382dccaf69fcc582f2fe4a1571dc0.tar.gz |
beta 2010.10.21 14:02
Diffstat (limited to 'tex')
-rw-r--r-- | tex/context/base/context.mkiv | 3 | ||||
-rw-r--r-- | tex/context/base/font-clr.lua | 2 | ||||
-rw-r--r-- | tex/context/base/l-table.lua | 22 | ||||
-rw-r--r-- | tex/context/base/lang-wrd.lua | 126 | ||||
-rw-r--r-- | tex/context/base/lang-wrd.mkiv | 13 | ||||
-rw-r--r-- | tex/context/base/s-lan-03.tex | 40 | ||||
-rw-r--r-- | tex/context/base/strc-reg.lua | 332 | ||||
-rw-r--r-- | tex/generic/context/luatex-fonts-merged.lua | 22 |
8 files changed, 333 insertions, 227 deletions
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index 89226a9a9..754b518f8 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -123,7 +123,6 @@ \loadmarkfile{lang-ini} \loadmarkfile{lang-lab} -\loadmarkfile{lang-wrd} \loadmarkfile{unic-ini} @@ -156,6 +155,8 @@ \loadmarkfile{lang-ara} \loadmarkfile{lang-cjk} +\loadmarkfile{lang-wrd} % can be optional + \loadmarkfile{symb-ini} \loadmarkfile{sort-ini} diff --git a/tex/context/base/font-clr.lua b/tex/context/base/font-clr.lua index 3733aaf68..7d19aadd5 100644 --- a/tex/context/base/font-clr.lua +++ b/tex/context/base/font-clr.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['font-clr'] = { license = "see context related readme files" } --- moved from ini: +-- moved from ini ... will become inline fonts.colors = fonts.colors or { } -- dummy in ini local colors = fonts.colors diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua index 4be077dfa..e165acba6 100644 --- a/tex/context/base/l-table.lua +++ b/tex/context/base/l-table.lua @@ -129,15 +129,21 @@ end table.sortedkeys = sortedkeys table.sortedhashkeys = sortedhashkeys +local function nothing() end + local function sortedhash(t) - local s = sortedhashkeys(t) -- maybe just sortedkeys - local n = 0 - local function kv(s) - n = n + 1 - local k = s[n] - return k, t[k] - end - return kv, s + if t then + local s = sortedhashkeys(t) -- maybe just sortedkeys + local n = 0 + local function kv(s) + n = n + 1 + local k = s[n] + return k, t[k] + end + return kv, s + else + return nothing + end end table.sortedhash = sortedhash diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua index 4d131f45a..7f7099c6d 100644 --- a/tex/context/base/lang-wrd.lua +++ b/tex/context/base/lang-wrd.lua @@ -22,6 +22,9 @@ words.data = words.data or { } words.enables = false words.threshold = 4 +local numbers = languages.numbers +local registered = languages.registered + local set_attribute = node.set_attribute local unset_attribute = node.unset_attribute local traverse_nodes = node.traverse @@ -40,11 +43,9 @@ local kerning_code = kerncodes.kerning local lowerchar = characters.lower local a_color = attributes.private('color') +local colist = attributes.list[a_color] -words.colors = { - ["known"] = "green", - ["unknown"] = "red", -} +local is_letter = characters.is_letter -- maybe is_character as variant local spacing = S(" \n\r\t") local markup = S("-=") @@ -86,7 +87,9 @@ end -- The following code is an adaption of experimental code for -- hyphenating and spell checking. -local function mark_words(head,whenfound) -- can be optimized +-- there is an n=1 problem somewhere in nested boxes + +local function mark_words(head,whenfound) -- can be optimized and shared local current, start, str, language, n, done = head, nil, "", nil, 0, false local function action() if #str > 0 then @@ -127,9 +130,10 @@ local function mark_words(head,whenfound) -- can be optimized local code = current.char local data = chardata[code] if data.uccode or data.lccode then +--~ if is_letter[code] then -- why does this fail start = start or current n = n + 1 - str = str .. utfchar(code) + str = str .. utfchar(code) -- slow, maybe str should be a table elseif start then action() end @@ -161,7 +165,7 @@ local wordmethod = 1 local enabled = false function words.check(head) - if enabled and head.next then + if enabled then return methods[wordmethod](head) else return head, false @@ -181,27 +185,39 @@ function words.disable() enabled = false end --- method 1 +-- colors + +local cache = { } -- can also be done with method 1 -- frozen colors once used -local colors = words.colors -local colist = attributes.list[a_color] +setmetatable(cache, { + __index = function(t,k) -- k == language, numbers[k] == tag + local c + if k < 0 then + c = colist["word:unset"] + else + c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"] + end + local v = c and function(n) set_attribute(n,a_color,c) end or false + t[k] = v + return v + end +} ) -local right = function(n) set_attribute(n,a_color,colist[colors.known]) end -local wrong = function(n) set_attribute(n,a_color,colist[colors.unknown]) end +-- method 1 local function sweep(language,str) if #str < words.threshold then return false - elseif words.found(language,str) then - return right + elseif words.found(language,str) then -- can become a local wordsfound + return cache["word:yes"] -- maybe variables.yes else - return wrong + return cache["word:no"] end end methods[1] = function(head) for n in traverse_nodes(head) do - unset_attribute(n,attribute) -- hm, not that selective (reset color) + unset_attribute(n,a_color) -- hm, not that selective (reset color) end return mark_words(head,sweep) end @@ -213,46 +229,63 @@ local dumpthem = false local listname = "document" local category = { } +local categories = { } + +setmetatable(categories, { + __index = function(t,k) + local languages = { } + setmetatable(languages, { + __index = function(t,k) + local r = registered[k] + local v = { + number = language, + parent = r and r.parent or nil, + patterns = r and r.patterns or nil, + tag = r and r.tag or nil, + list = { }, + total = 0, + unique = 0, + } + t[k] = v + return v + end + } ) + local v = { + languages = languages, + total = 0, + } + t[k] = v + return v + end +} ) local collected = { total = 0, - categories = { document = { total = 0, list = { } } }, + version = 1.000, + categories = categories, } enablers[2] = function(settings) local name = settings.list listname = name and name ~= "" and name or "document" category = collected.categories[listname] - if not category then - category = { } - collected.categories[listname] = category - end end -local numbers = languages.numbers -local registered = languages.registered - local function sweep(language,str) if #str >= words.threshold then - collected.total = collected.total + 1 str = lowerchar(str) - local number = numbers[language] or "unset" - local words = category[number] - if not words then - local r = registered[number] - category[number] = { - number = language, - parent = r and r.parent or nil, - patterns = r and r.patterns or nil, - tag = r and r.tag or nil, - list = { [str] = 1 }, - total = 1, - } + local words = category.languages[numbers[language] or "unset"] + local list = words.list + local ls = list[str] + if ls then + list[str] = ls + 1 else - local list = words.list - list[str] = (list[str] or 0) + 1 - words.total = words.total + 1 + list[str] = 1 + words.unique = words.unique + 1 end + collected.total = collected.total + 1 + category.total = category.total + 1 + words.total = words.total + 1 end end @@ -277,6 +310,19 @@ end) luatex.registerstopactions(dumpusedwords) +-- method 3 + +local function sweep(language,str) + return cache[language] +end + +methods[3] = function(head) + for n in traverse_nodes(head) do + unset_attribute(n,a_color) + end + return mark_words(head,sweep) +end + -- for the moment we hook it into the attribute handler --~ languagehacks = { } diff --git a/tex/context/base/lang-wrd.mkiv b/tex/context/base/lang-wrd.mkiv index 9b149462a..ad353905f 100644 --- a/tex/context/base/lang-wrd.mkiv +++ b/tex/context/base/lang-wrd.mkiv @@ -51,9 +51,22 @@ % beware, maybe some day we will honour grouping +% 1: spell checking +% 2: word counting +% 3: language coloring + \setupspellchecking [\c!state=\v!stop, \c!method=1, \c!list=] +\definecolor[word:yes] [darkgreen] +\definecolor[word:no] [darkred] + +%definecolor[word:unset] [darkgray] +\definecolor[word:en] [b=.75] +\definecolor[word:de] [r=.75] +\definecolor[word:nl] [g=.75] +\definecolor[word:unknown][r=.75,g=.75] + \protect \endinput diff --git a/tex/context/base/s-lan-03.tex b/tex/context/base/s-lan-03.tex new file mode 100644 index 000000000..b8d24539b --- /dev/null +++ b/tex/context/base/s-lan-03.tex @@ -0,0 +1,40 @@ +%D \module +%D [ file=s-lan-03, +%D version=2010.10.21, +%D title=\CONTEXT\ Style File, +%D subtitle=Language Environment 3, +%D author=Hans Hagen, +%D date=\currentdate, +%D copyright={PRAGMA / Hans Hagen \& Ton Otten}] +%C +%C This module is part of the \CONTEXT\ macro||package and is +%C therefore copyrighted by \PRAGMA. See mreadme.pdf for +%C details. + +\startluacode +languages.words.tracers = languages.words.tracers or { } + +function languages.words.tracers.showwords(filename) + filename = filename or file.addsuffix(tex.jobname,"words") + if lfs.isfile(filename) then + local w = dofile(filename) + if w then + -- table.print(w) + for cname, category in table.sortedpairs(w.categories) do + for lname, language in table.sortedpairs(category.languages) do + context.bold(string.format("category: %s, language: %s, total: %s, unique: %s:", + cname, lname, language.total or 0, language.unique or 0) + ) + for word, n in table.sortedpairs(language.list) do + context(" %s (%s)",word,n) + end + context.par() + end + end + end + end +end +\stopluacode + +% \ctxlua{languages.words.tracers.showwords("words-003.words")} + diff --git a/tex/context/base/strc-reg.lua b/tex/context/base/strc-reg.lua index c5519141f..1ea285a90 100644 --- a/tex/context/base/strc-reg.lua +++ b/tex/context/base/strc-reg.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['strc-reg'] = { local next, type = next, type local texwrite, texcount = tex.write, tex.count -local format, gmatch, concat = string.format, string.gmatch, table.concat +local format, gmatch, concat, remove = string.format, string.gmatch, table.concat, table.remove local utfchar = utf.char local lpegmatch = lpeg.match local allocate, mark = utilities.storage.allocate, utilities.storage.mark @@ -333,6 +333,7 @@ function registers.compare(a,b) elseif page_a > page_b then return 1 end + else -- see end return 0 end @@ -448,94 +449,161 @@ function registers.userdata(index,name) end end --- proc can be wrapped +-- todo: ownnumber local seeindex = 0 -function registers.flush(data,options,prefixspec,pagespec) - local equal = table.are_equal - -- local usedtags = { } - -- for i=1,#result do - -- usedtags[#usedtags+1] = result[i].tag - -- end - -- context.setvalue("usedregistertags",concat(usedtags,",")) -- todo: { } and escape special chars - -- - context.startregisteroutput() - local collapse_singles = options.compress == interfaces.variables.yes - local collapse_ranges = options.compress == interfaces.variables.all - local result = data.result - -- todo ownnumber - local function pagenumber(entry) - local er = entry.references - context.registeronepage( - entry.processors and entry.processors[2] or "", - er.internal or 0, - er.realpage or 0, - function() helpers.prefixpage(entry,prefixspec,pagespec) end - ) - end - local function pagerange(f_entry,t_entry,is_last) - local fer = f_entry.references - local ter = t_entry.references - context.registerpagerange( - f_entry.processors and f_entry.processors[2] or "", - fer.internal or 0, - fer.realpage or 0, - function() - helpers.prefixpage(f_entry,prefixspec,pagespec) - end, - ter.internal or 0, - ter.lastrealpage or ter.realpage or 0, - function() - if is_last then - helpers.prefixlastpage(t_entry,prefixspec,pagespec) -- swaps page and realpage keys - else - helpers.prefixpage (t_entry,prefixspec,pagespec) - end +local function pagerange(f_entry,t_entry,is_last,prefixspec,pagespec) + local fer, ter = f_entry.references, t_entry.references + context.registerpagerange( + f_entry.processors and f_entry.processors[2] or "", + fer.internal or 0, + fer.realpage or 0, + function() + helpers.prefixpage(f_entry,prefixspec,pagespec) + end, + ter.internal or 0, + ter.lastrealpage or ter.realpage or 0, + function() + if is_last then + helpers.prefixlastpage(t_entry,prefixspec,pagespec) -- swaps page and realpage keys + else + helpers.prefixpage (t_entry,prefixspec,pagespec) end - ) - end - -- - -- maybe we can nil the splits and save memory - -- - do - -- hash words (potential see destinations) - local words = { } - for i=1,#result do - local data = result[i].data - for j=1,#data do - local d = data[j] - local word = d.list[1][1] - words[word] = d + end + ) +end + +local function pagenumber(entry,prefixspec,pagespec) + local er = entry.references + context.registeronepage( + entry.processors and entry.processors[2] or "", + er.internal or 0, + er.realpage or 0, + function() helpers.prefixpage(entry,prefixspec,pagespec) end + ) +end + +-- local usedtags = { } +-- for i=1,#result do +-- usedtags[#usedtags+1] = result[i].tag +-- end +-- context.setvalue("usedregistertags",concat(usedtags,",")) -- todo: { } and escape special chars + +--~ local function remove(pages,i) -- todo: use table.remove(pages,i) +--~ for j=i,#pages-1 do +--~ pages[j] = pages[j+1] +--~ end +--~ pages[#pages] = nil +--~ end + +local function collapsedpage(pages) + for i=2,#pages do + local first, second = pages[i-1], pages[i] + local first_first, first_last, second_first, second_last = first[1], first[2], second[1], second[2] + local first_last_pn = first_last .references.realpage + local second_first_pn = second_first.references.realpage + local second_last_pn = second_last .references.realpage + local first_last_last = first_last .references.lastrealpage + local second_first_last = second_first.references.lastrealpage + if first_last_last then + first_last_pn = first_last_last + if second_first == second_last and second_first_pn <= first_last_pn then + -- 2=8, 5 -> 12=8 + remove(pages,i) + return true + elseif second_first == second_last and second_first_pn > first_last_pn then + -- 2=8, 9 -> 2-9 + pages[i-1] = { first_first, second_last } + remove(pages,i) + return true + elseif second_last_pn < first_last_pn then + -- 2=8, 3-4 -> 2=8 + remove(pages,i) + return true + elseif first_last_pn < second_last_pn then + -- 2=8, 3-9 -> 2-9 + pages[i-1] = { first_first, second_last } + remove(pages,i) + return true + elseif first_last_pn + 1 == second_first_pn and second_last_pn > first_last_pn then + -- 2=8, 9-11 -> 2-11 + pages[i-1] = { first_first, second_last } + remove(pages,i) + return true + elseif second_first.references.lastrealpage then + -- 2=8, 9=11 -> 2-11 + pages[i-1] = { first_first, second_last } + remove(pages,i) + return true end + elseif second_first_last then + second_first_pn = second_first_last + if first_last_pn == second_first_pn then + -- 2-4, 5=9 -> 2-9 + pages[i-1] = { first_first, second_last } + remove(pages,i) + return true + end + elseif first_last_pn == second_first_pn then + -- 2-3, 3-4 -> 2-4 + pages[i-1] = { first_last, second_last } + remove(pages,i) + return true end - -- link seewords to words and tag destination - for i=1,#result do - local data = result[i].data - for j=1,#data do - local d = data[j] - local seeword = d.seeword - if seeword then - local text = seeword.text - if text then - local w = words[text] - if w then - local wr = w.references -- the referred word - local dr = d.references -- the see word - if wr.seeparent then - dr.seeindex = wr.seeparent - else - seeindex = seeindex + 1 - wr.seeparent = seeindex - dr.seeindex = seeindex - end + end + return false +end + +function collapsepages(pages) + while collapsedpage(pages) do end +end + +local function crosslinkseewords(result) + -- hash words (potential see destinations) + local words = { } + for i=1,#result do + local data = result[i].data + for j=1,#data do + local d = data[j] + local word = d.list[1][1] + words[word] = d + end + end + -- link seewords to words and tag destination + for i=1,#result do + local data = result[i].data + for j=1,#data do + local d = data[j] + local seeword = d.seeword + if seeword then + local text = seeword.text + if text then + local w = words[text] + if w then + local wr = w.references -- the referred word + local dr = d.references -- the see word + if wr.seeparent then + dr.seeindex = wr.seeparent + else + seeindex = seeindex + 1 + wr.seeparent = seeindex + dr.seeindex = seeindex end end end end end end - -- +end + +function registers.flush(data,options,prefixspec,pagespec) + local equal = table.are_equal + local collapse_singles = options.compress == variables.yes + local collapse_ranges = options.compress == variables.all + local result = data.result + crosslinkseewords(result) + context.startregisteroutput() for i=1,#result do -- ranges need checking ! local sublist = result[i] @@ -568,7 +636,7 @@ function registers.flush(data,options,prefixspec,pagespec) context.startregisterentries(n) end end - local internal = entry.references.internal or 0 + local internal = entry.references.internal or 0 local seeparent = entry.references.seeparent or "" local processor = entry.processors and entry.processors[1] or "" if metadata then @@ -588,9 +656,7 @@ function registers.flush(data,options,prefixspec,pagespec) if collapse_singles or collapse_ranges then -- we collapse ranges and keep existing ranges as they are -- so we get prebuilt as well as built ranges - local first, last, prev = entry, nil, entry - local pages = { } - local dd = d + local first, last, prev, pages, dd = entry, nil, entry, { }, d while dd < #data do dd = dd + 1 local next = data[dd] @@ -600,11 +666,7 @@ function registers.flush(data,options,prefixspec,pagespec) --~ first = nil break elseif next.references.lastrealpage then - if first then - pages[#pages+1] = { first, last or first } - else - pages[#pages+1] = { entry, entry } - end + pages[#pages+1] = first and { first, last or first } or { entry, entry } pages[#pages+1] = { next, next } first, last, prev = nil, nil, nil elseif not first then @@ -620,102 +682,35 @@ function registers.flush(data,options,prefixspec,pagespec) pages[#pages+1] = { first, last or first } end if collapse_ranges and #pages > 1 then - -- ok, not that efficient - local function doit() - local function bubble(i) - for j=i,#pages-1 do - pages[j] = pages[j+1] - end - pages[#pages] = nil - end - for i=2,#pages do - local first, second = pages[i-1], pages[i] - local first_first, first_last, second_first, second_last = first[1], first[2], second[1], second[2] - local first_last_pn = first_last .references.realpage - local second_first_pn = second_first.references.realpage - local second_last_pn = second_last .references.realpage - local first_last_last = first_last .references.lastrealpage - local second_first_last = second_first.references.lastrealpage - if first_last_last then - first_last_pn = first_last_last - if second_first == second_last and second_first_pn <= first_last_pn then - -- 2=8, 5 -> 12=8 - bubble(i) - return true - elseif second_first == second_last and second_first_pn > first_last_pn then - -- 2=8, 9 -> 2-9 - pages[i-1] = { first_first, second_last } - bubble(i) - return true - elseif second_last_pn < first_last_pn then - -- 2=8, 3-4 -> 2=8 - bubble(i) - return true - elseif first_last_pn < second_last_pn then - -- 2=8, 3-9 -> 2-9 - pages[i-1] = { first_first, second_last } - bubble(i) - return true - elseif first_last_pn + 1 == second_first_pn and second_last_pn > first_last_pn then - -- 2=8, 9-11 -> 2-11 - pages[i-1] = { first_first, second_last } - bubble(i) - return true - elseif second_first.references.lastrealpage then - -- 2=8, 9=11 -> 2-11 - pages[i-1] = { first_first, second_last } - bubble(i) - return true - end - elseif second_first_last then - second_first_pn = second_first_last - if first_last_pn == second_first_pn then - -- 2-4, 5=9 -> 2-9 - pages[i-1] = { first_first, second_last } - bubble(i) - return true - end - elseif first_last_pn == second_first_pn then - -- 2-3, 3-4 -> 2-4 - pages[i-1] = { first_last, second_last } - bubble(i) - return true - end - end - return false - end - while doit() do end + collapsepages(pages) end - -- if #pages > 0 then -- or 0 d = dd for p=1,#pages do local first, last = pages[p][1], pages[p][2] if first == last then if first.references.lastrealpage then - pagerange(first,first,true) + pagerange(first,first,true,prefixspec,pagespec) else - pagenumber(first) + pagenumber(first,prefixspec,pagespec) end elseif last.references.lastrealpage then - pagerange(first,last,true) + pagerange(first,last,true,prefixspec,pagespec) else - pagerange(first,last,false) + pagerange(first,last,false,prefixspec,pagespec) end end + elseif entry.references.lastrealpage then + pagerange(entry,entry,true,prefixspec,pagespec) else - if entry.references.lastrealpage then - pagerange(entry,entry,true) - else - pagenumber(entry) - end + pagenumber(entry,prefixspec,pagespec) end else while true do if entry.references.lastrealpage then - pagerange(entry,entry,true) + pagerange(entry,entry,true,prefixspec,pagespec) else - pagenumber(entry) + pagenumber(entry,prefixspec,pagespec) end if d == #data then break @@ -733,13 +728,12 @@ function registers.flush(data,options,prefixspec,pagespec) end context.stopregisterpages() elseif kind == 'see' then - -- maybe some day more words + -- maybe some day more words, todo: metadata like normal entries context.startregisterseewords() - local seeindex = entry.references.seeindex or "" - local seetext = entry.seeword.text or "" - local proc = entry.processors and entry.processors[1] - -- todo: metadata like normal entries - context.registeroneword(proc or "",0,seeindex,seetext) + local seeindex = entry.references.seeindex or "" + local seetext = entry.seeword.text or "" + local processor = entry.processors and entry.processors[1] or "" + context.registeroneword(processor,0,seeindex,seetext) context.stopregisterseewords() end end diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua index 83ca1c35c..bf2cd47fa 100644 --- a/tex/generic/context/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 10/20/10 21:33:36 +-- merge date : 10/21/10 14:02:50 do -- begin closure to overcome local limits and interference @@ -766,15 +766,21 @@ end table.sortedkeys = sortedkeys table.sortedhashkeys = sortedhashkeys +local function nothing() end + local function sortedhash(t) - local s = sortedhashkeys(t) -- maybe just sortedkeys - local n = 0 - local function kv(s) - n = n + 1 - local k = s[n] - return k, t[k] + if t then + local s = sortedhashkeys(t) -- maybe just sortedkeys + local n = 0 + local function kv(s) + n = n + 1 + local k = s[n] + return k, t[k] + end + return kv, s + else + return nothing end - return kv, s end table.sortedhash = sortedhash |