diff options
author | Hans Hagen <pragma@wxs.nl> | 2010-10-29 11:35:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2010-10-29 11:35:00 +0200 |
commit | 2e3932acbaa9da37885abbbd1f449a110c06f9a1 (patch) | |
tree | 3a4b1c5a08129cd5881cd8f8758b0ebd2fb4f80d /tex/context/base/lang-wrd.lua | |
parent | a1bbb1605334a1209f43625c174ba50ad698235e (diff) | |
download | context-2e3932acbaa9da37885abbbd1f449a110c06f9a1.tar.gz |
beta 2010.10.29 11:35
Diffstat (limited to 'tex/context/base/lang-wrd.lua')
-rw-r--r-- | tex/context/base/lang-wrd.lua | 124 |
1 files changed, 100 insertions, 24 deletions
diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua index 7f7099c6d..d008ed341 100644 --- a/tex/context/base/lang-wrd.lua +++ b/tex/context/base/lang-wrd.lua @@ -8,6 +8,7 @@ if not modules then modules = { } end modules ['lang-ini'] = { local utf = unicode.utf8 local lower, utfchar = string.lower, utf.char +local concat = table.concat local lpegmatch = lpeg.match local P, S, Cs = lpeg.P, lpeg.S, lpeg.Cs @@ -59,6 +60,7 @@ local loaded = { } -- we share lists function words.load(tag,filename) local fullname = resolvers.findfile(filename,'other text file') or "" if fullname ~= "" then + report_languages("loading word file '%s'",fullname) statistics.starttiming(languages) local list = loaded[fullname] if not list then @@ -78,9 +80,13 @@ function words.found(id, str) local tag = languages.numbers[id] if tag then local data = wordsdata[tag] - return data and (data[str] or data[lower(str)]) - else - return false + if data then + if data[str] then + return 1 + elseif data[lower(str)] then + return 2 + end + end end end @@ -89,20 +95,86 @@ end -- there is an n=1 problem somewhere in nested boxes +--~ local function mark_words(head,whenfound) -- can be optimized and shared +--~ local current, start, str, language, n, done = head, nil, "", nil, 0, false +--~ local function action() +--~ if #str > 0 then +--~ local f = whenfound(language,str) +--~ if f then +--~ done = true +--~ for i=1,n do +--~ f(start) +--~ start = start.next +--~ end +--~ end +--~ end +--~ str, start, n = "", nil, 0 +--~ end +--~ while current do +--~ local id = current.id +--~ if id == glyph_code then +--~ local a = current.lang +--~ if a then +--~ if a ~= language then +--~ if start then +--~ action() +--~ end +--~ language = a +--~ end +--~ elseif start then +--~ action() +--~ language = a +--~ end +--~ local components = current.components +--~ if components then +--~ start = start or current +--~ n = n + 1 +--~ for g in traverse_nodes(components) do +--~ str = str .. utfchar(g.char) +--~ end +--~ else +--~ local code = current.char +--~ local data = chardata[code] +--~ if is_letter[data.category] then +--~ start = start or current +--~ n = n + 1 +--~ str = str .. utfchar(code) -- slow, maybe str should be a table (and given max) +--~ elseif start then +--~ action() +--~ end +--~ end +--~ elseif id == disc_code then +--~ if n > 0 then +--~ n = n + 1 +--~ end +--~ elseif id == kern_code and current.subtype == kerning_code and start then +--~ -- ok +--~ elseif start then +--~ action() +--~ end +--~ current = current.next +--~ end +--~ if start then +--~ action() +--~ end +--~ return head, done +--~ end + local function mark_words(head,whenfound) -- can be optimized and shared - local current, start, str, language, n, done = head, nil, "", nil, 0, false + local current, language, done = head, nil, nil, 0, false + local str, s, nds, n = { }, 0, { }, 0 -- n could also be a table, saves calls local function action() - if #str > 0 then - local f = whenfound(language,str) - if f then + if s > 0 then + local word = concat(str,"",1,s) + local mark = whenfound(language,word) + if mark then done = true for i=1,n do - f(start) - start = start.next + mark(nds[i]) end end end - str, start, n = "", nil, 0 + n, s = 0, 0 end while current do local id = current.id @@ -110,46 +182,48 @@ local function mark_words(head,whenfound) -- can be optimized and shared local a = current.lang if a then if a ~= language then - if start then + if s > 0 then action() end language = a end - elseif start then + elseif s > 0 then action() language = a end local components = current.components if components then - start = start or current n = n + 1 + nds[n] = current for g in traverse_nodes(components) do - str = str .. utfchar(g.char) + s = s + 1 + str[s] = utfchar(g.char) end else local code = current.char local data = chardata[code] - if data.uccode or data.lccode then ---~ if is_letter[code] then -- why does this fail - start = start or current + if is_letter[data.category] then n = n + 1 - str = str .. utfchar(code) -- slow, maybe str should be a table - elseif start then + nds[n] = current + s = s + 1 + str[s] = utfchar(code) + elseif s > 0 then action() end end - elseif id == disc_code then + elseif id == disc_code then -- take the replace if n > 0 then n = n + 1 + nds[n] = current end - elseif id == kern_code and current.subtype == kerning_code and start then + elseif id == kern_code and current.subtype == kerning_code and s > 0 then -- ok - elseif start then + elseif s > 0 then action() end current = current.next end - if start then + if s > 0 then action() end return head, done @@ -192,7 +266,9 @@ local cache = { } -- can also be done with method 1 -- frozen colors once used setmetatable(cache, { __index = function(t,k) -- k == language, numbers[k] == tag local c - if k < 0 then + if type(k) == "string" then + c = colist[k] + elseif k < 0 then c = colist["word:unset"] else c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"] |