beta 2010.08.10 17:14

author: Marius <mariausol@gmail.com> 2010-08-14 15:56:20 +0300
committer: Marius <mariausol@gmail.com> 2010-08-14 15:56:20 +0300
commit: b469b8ec1b494ab72cd462bfc539ce01440e6aaf (patch)
tree: 3a9c3fb8433c5f75020fef1d531bedb7c948f66c /scripts/context/lua/mtx-patterns.lua
parent: 39e30629c15ae4a899532d84c4abea127f2847a6 (diff)
download: context-b469b8ec1b494ab72cd462bfc539ce01440e6aaf.tar.gz
1 files changed, 319 insertions, 274 deletions
diff --git a/scripts/context/lua/mtx-patterns.lua b/scripts/context/lua/mtx-patterns.lua
index c3817e9a8..a51ca5860 100644
--- a/scripts/context/lua/mtx-patterns.lua
+++ b/scripts/context/lua/mtx-patterns.lua
@@ -6,95 +6,111 @@ if not modules then modules = { } end modules ['mtx-patterns'] = {
     license   = "see context related readme files"
 }
 
-local format, find, concat = string.format, string.find, table.concat
+local format, find, concat, gsub, match, gmatch = string.format, string.find, table.concat, string.gsub, string.match, string.gmatch
+local byte, char = utf.byte, utf.char
+local addsuffix = file.addsuffix
+local lpegmatch, validutf8 = lpeg.match, lpeg.patterns.validutf8
 
 scripts          = scripts          or { }
 scripts.patterns = scripts.patterns or { }
 
+
+local permitted_characters = table.tohash {
+    0x0009, -- tab
+    0x0027, -- apostrofe
+    0x02BC, -- modifier apostrofe (used in greek)
+    0x002D, -- hyphen
+    0x200C, -- zwnj
+    0x2019, -- quote right
+    0x1FBD, -- greek, but no letter: symbol modifier
+    0x1FBF, -- greek, but no letter: symbol modifier
+}
+
+local ignored_ancient_greek = table.tohash {
+    0x1FD3, -- greekiotadialytikatonos (also 0x0390)
+    0x1FE3, -- greekupsilondialytikatonos (also 0x03B0)
+    0x1FBD, -- greek, but no letter: symbol modifier
+    0x1FBF, -- greek, but no letter: symbol modifier
+    0x03F2, -- greeksigmalunate
+    0x02BC, -- modifier apostrofe)
+}
+
+local ignored_french = table.tohash {
+    0x02BC, -- modifier apostrofe
+}
+
+local replaced_whatever =  {
+    [char(0x2019)] = char(0x0027)
+}
+
 scripts.patterns.list = {
-    -- no patterns for arabic
---  { "ar",  "hyph-ar.tex",            "arabic" },
-    -- not supported
---  { "as",  "hyph-as.tex",            "assamese" },
-    { "bg",  "hyph-bg.tex",            "bulgarian" },
-    -- not supported
---  { "bn",  "hyph-bn.tex",            "bengali" },
-    { "ca",  "hyph-ca.tex",            "catalan" },
-    -- not supported
---  { "cop", "hyph-cop.tex",           "coptic" },
-    { "cs",  "hyph-cs.tex",            "czech" },
-    { "cy",  "hyph-cy.tex",            "welsh" },
-    { "da",  "hyph-da.tex",            "danish" },
-    { "deo", "hyph-de-1901.tex",       "german, old spelling" },
-    { "de",  "hyph-de-1996.tex",       "german, new spelling" },
-    { "??",  "hyph-de-ch-1901.tex",    "swiss german" },
---~ { "??",  "hyph-el-monoton.tex",    "" },
---~ { "??",  "hyph-el-polyton.tex",    "" },
-    { "agr", "hyph-grc.tex",           "ancient greek" },
-    { "gb",  "hyph-en-gb.tex",         "british english" },
-    { "us",  "hyph-en-us.tex",	       "american english" },
---~ { "gr",  "",                       "" },
-    -- these patterns do not satisfy the rules of 'clean patterns'
---  { "eo",  "hyph-eo.tex",            "esperanto" },
-    { "es",  "hyph-es.tex",            "spanish" },
-    { "et",  "hyph-et.tex",            "estonian" },
-    { "eu",  "hyph-eu.tex",            "basque" },
-    -- no patterns for farsi/persian
---  { "fa",  "hyph-fa.tex",            "farsi" },
-    { "fi",  "hyph-fi.tex",            "finnish" },
-    { "fr",  "hyph-fr.tex",            "french" },
-    { "??",  "hyph-ga.tex",            "irish" },
-    { "??",  "hyph-gl.tex",            "galician" },
-    -- not supported
---  { "gu",  "hyph-gu.tex",            "gujarati" },
-    -- not supported
---  { "hi",  "hyph-hi.tex",            "hindi" },
-    { "hr",  "hyph-hr.tex",            "croatian" },
-    { "??",  "hyph-hsb.tex",           "upper sorbian" },
-    { "hu",  "hyph-hu.tex",            "hungarian" },
-    -- not supported
---  { "hy",  "hyph-hy.tex",            "armenian" },
-    { "??",  "hyph-ia.tex",            "interlingua" },
-    { "??",  "hyph-id.tex",            "indonesian" },
-    { "is",  "hyph-is.tex",            "icelandic" },
-    { "it",  "hyph-it.tex",            "italian" },
-    { "??",  "hyph-kmr.tex",           "kurmanji" },
-    -- not supported
---  { "kn",  "hyph-kn.tex",            "kannada" },
-    { "la",  "hyph-la.tex",            "latin" },
-    -- not supported
---  { "lo",  "hyph-lo.tex",            "lao" },
-    { "lt",  "hyph-lt.tex",            "lithuanian" },
-    { "??",  "hyph-lv.tex",            "latvian" },
-    { "mn",  "hyph-mn-cyrl.tex",       "mongolian, cyrillic script" },
-    { "nb",  "hyph-nb.tex",            "norwegian bokmål" },
-    { "nl",  "hyph-nl.tex",            "dutch" },
-    { "nn",  "hyph-nn.tex",            "norwegian nynorsk" },
-    -- not supported
---  { "or",  "hyph-or.tex",            "oriya" },
-    -- not supported
---  { "pa",  "hyph-pa.tex",            "panjabi" },
-    -- not supported
---  { "",  "hyph-.tex",            "" },
-    { "pl",  "hyph-pl.tex",            "polish" },
-    { "pt",  "hyph-pt.tex",            "portuguese" },
-    { "ro",  "hyph-ro.tex",            "romanian" },
-    { "ru",  "hyph-ru.tex",            "russian" },
-    -- not supported
---  { "sa",  "hyph-sa.tex",            "sanskrit" },
-    { "sk",  "hyph-sk.tex",            "slovak" },
-    { "sl",  "hyph-sl.tex",            "slovenian" },
-    -- TODO: there is both Cyrillic and Latin script available
-    { "sr",  "hyph-sr-cyrl.tex",       "serbian" },
-    { "sv",  "hyph-sv.tex",            "swedish" },
-    -- not supported
---  { "ta",  "hyph-ta.tex",            "tamil" },
-    -- not supported
---  { "te",  "hyph-te.tex",            "telugu" },
-    { "tk",  "hyph-tk.tex",            "turkmen" },
-    { "tr",  "hyph-tr.tex",            "turkish" },
-    { "uk",  "hyph-uk.tex",            "ukrainian" },
-    { "zh",  "hyph-zh-latn.tex",       "zh-latn, chinese Pinyin" },
+ -- { "ar",  "hyph-ar",            "arabic" },
+ -- { "as",  "hyph-as",            "assamese" },
+    { "bg",  "hyph-bg",            "bulgarian" },
+ -- { "bn",  "hyph-bn",            "bengali" },
+    { "ca",  "hyph-ca",            "catalan" },
+ -- { "??",  "hyph-cop",           "coptic" },
+    { "cs",  "hyph-cs",            "czech" },
+    { "cy",  "hyph-cy",            "welsh" },
+    { "da",  "hyph-da",            "danish" },
+    { "deo", "hyph-de-1901",       "german, old spelling" },
+    { "de",  "hyph-de-1996",       "german, new spelling" },
+ -- { "??",  "hyph-de-ch-1901",    "swiss german" },
+ -- { "??",  "hyph-el-monoton",    "greek" },
+ -- { "gr",  "hyph-el-polyton",    "greek" },
+    { "agr", "hyph-grc",           "ancient greek", ignored_ancient_greek },
+    { "gb",  "hyph-en-gb",         "british english" },
+    { "us",  "hyph-en-us",	       "american english" },
+ -- { "eo",  "hyph-eo",            "esperanto" },
+    { "es",  "hyph-es",            "spanish" },
+    { "et",  "hyph-et",            "estonian" },
+    { "eu",  "hyph-eu",            "basque" },
+ -- { "fa",  "hyph-fa",            "farsi" },
+    { "fi",  "hyph-fi",            "finnish" },
+    { "fr",  "hyph-fr",            "french", ignored_french },
+ -- { "??",  "hyph-ga",            "irish" },
+ -- { "??",  "hyph-gl",            "galician" },
+ -- { "gu",  "hyph-gu",            "gujarati" },
+ -- { "hi",  "hyph-hi",            "hindi" },
+    { "hr",  "hyph-hr",            "croatian" },
+ -- { "??",  "hyph-hsb",           "upper sorbian" },
+    { "hu",  "hyph-hu",            "hungarian" },
+ -- { "hy",  "hyph-hy",            "armenian" },
+ -- { "??",  "hyph-ia",            "interlingua" },
+ -- { "??",  "hyph-id",            "indonesian" },
+    { "is",  "hyph-is",            "icelandic" },
+    { "it",  "hyph-it",            "italian" },
+ -- { "??",  "hyph-kmr",           "kurmanji" },
+ -- { "kn",  "hyph-kn",            "kannada" },
+    { "la",  "hyph-la",            "latin" },
+ -- { "lo",  "hyph-lo",            "lao" },
+    { "lt",  "hyph-lt",            "lithuanian" },
+    { "lv",  "hyph-lv",            "latvian" },
+ -- { "ml",  "hyph-ml",            "..." },
+    { "mn",  "hyph-mn-cyrl",       "mongolian, cyrillic script" },
+ -- { "mr",  "hyph-mr",            "..." },
+    { "nb",  "hyph-nb",            "norwegian bokmål" },
+    { "nl",  "hyph-nl",            "dutch" },
+    { "nn",  "hyph-nn",            "norwegian nynorsk" },
+ -- { "or",  "hyph-or",            "oriya" },
+ -- { "pa",  "hyph-pa",            "panjabi" },
+ -- { "",    "hyph-",              "" },
+    { "pl",  "hyph-pl",            "polish" },
+    { "pt",  "hyph-pt",            "portuguese" },
+    { "ro",  "hyph-ro",            "romanian" },
+    { "ru",  "hyph-ru",            "russian" },
+ -- { "sa",  "hyph-sa",            "sanskrit" },
+    { "sk",  "hyph-sk",            "slovak" },
+    { "sl",  "hyph-sl",            "slovenian" },
+    { "sr",  "hyph-sr-cyrl",       "serbian" },
+ -- { "sr",  "hyph-sr-latn",       "serbian" },
+    { "sv",  "hyph-sv",            "swedish" },
+ -- { "ta",  "hyph-ta",            "tamil" },
+ -- { "te",  "hyph-te",            "telugu" },
+    { "tk",  "hyph-tk",            "turkmen" },
+    { "tr",  "hyph-tr",            "turkish" },
+    { "uk",  "hyph-uk",            "ukrainian" },
+    { "zh",  "hyph-zh-latn",       "zh-latn, chinese pinyin" },
 }
 
 -- stripped down from lpeg example:
@@ -105,189 +121,220 @@ function utf.check(str)
     return lpeg.match(lpeg.patterns.validutf8,str)
 end
 
-local permitted_commands = table.tohash {
-    "message",
-    "endinput"
-}
-
-local permitted_characters = table.tohash {
-    0x0009, -- tab
-    0x0027, -- apostrofe
-    0x002D, -- hyphen
-    0x200C, --
-}
+-- *.tex
+-- *.hyp.txt *.pat.txt *.lic.txt *.chr.txt
 
-function scripts.patterns.load(path,name,mnemonic,fullcheck)
+function scripts.patterns.load(path,name,mnemonic,ignored)
     local fullname = file.join(path,name)
-    local data = io.loaddata(fullname) or ""
-    local byte, char = utf.byte, utf.char
-    if data ~= "" then
-        data = data:gsub("([\n\r])\\input ([^ \n\r]+)", function(previous,subname)
-            local subname = file.addsuffix(subname,"tex")
-            local subfull = file.join(file.dirname(fullname),subname)
-            local subdata = io.loaddata(subfull) or ""
-            if subdata == "" then
-                if mnemonic then
-                    logs.simple("no subfile %s for language %s",subname,mnemonic)
-                else
-                    logs.simple("no subfile %s",name)
+    local texfile = addsuffix(fullname,"tex")
+    local hypfile = addsuffix(fullname,"hyp.txt")
+    local patfile = addsuffix(fullname,"pat.txt")
+    local licfile = addsuffix(fullname,"lic.txt")
+ -- local chrfile = addsuffix(fullname,"chr.txt")
+    local okay = true
+    local hyphenations, patterns, comment, stripset = "", "", "", ""
+    local splitpatternsnew, splithyphenationsnew = { }, { }
+    local splitpatternsold, splithyphenationsold = { }, { }
+    local usedpatterncharacters, usedhyphenationcharacters = { }, { }
+    if lfs.isfile(patfile) then
+        logs.simple("using txt files %s.[hyp|pat|lic].txt",name)
+        comment, patterns, hyphenations = io.loaddata(licfile) or "", io.loaddata(patfile) or "", io.loaddata(hypfile) or ""
+        hypfile, patfile, licfile = hypfile, patfile, licfile
+    elseif lfs.isfile(texfile) then
+        logs.simple("using tex file %s.txt",name)
+        local data = io.loaddata(texfile) or ""
+        if data ~= "" then
+            data = gsub(data,"([\n\r])\\input ([^ \n\r]+)", function(previous,subname)
+                local subname = addsuffix(subname,"tex")
+                local subfull = file.join(file.dirname(texfile),subname)
+                local subdata = io.loaddata(subfull) or ""
+                if subdata == "" then
+                    logs.simple("no subfile %s",subname)
                 end
-            end
-            return previous .. subdata
-        end)
-        local comment = data:match("^(.-)[\n\r]\\patterns") or ""
-        local n, okay = 0, true
-        local cd = characters.data
-        for line in data:gmatch("[^ \n\r]+") do
-            local ok = utf.check(line)
-            n = n + 1
-            if not ok then
-                okay = false
-                line = line:gsub("%%","%%%%")
-                if fullcheck then
-                    if mnemonic then
-                        logs.simple("invalid utf in language %s, file %s, line %s: %s",mnemonic,name,n,line)
-                    else
-                        logs.simple("invalid utf in file %s, line %s: %s",name,n,line)
-                    end
-                else
-                    if mnemonic then
-                        logs.simple("file %s for %s contains invalid utf",name,mnemonic)
-                    else
-                        logs.simple("file %s contains invalid utf",name)
+                return previous .. subdata
+            end)
+            data = gsub(data,"%%.-[\n\r]","")
+            data = gsub(data," *[\n\r]+","\n")
+            patterns = match(data,"\\patterns[%s]*{[%s]*(.-)[%s]*}") or ""
+            hyphenations = match(data,"\\hyphenation[%s]*{[%s]*(.-)[%s]*}") or ""
+            comment = match(data,"^(.-)[\n\r]\\patterns") or ""
+        else
+            okay = false
+        end
+    else
+        okay = false
+    end
+    if okay then
+        -- split into lines
+        local how = lpeg.patterns.whitespace^1
+        splitpatternsnew = lpeg.split(how,patterns)
+        splithyphenationsnew = lpeg.split(how,hyphenations)
+    end
+    if okay then
+        -- remove comments
+        local function check(data,splitdata,name)
+            if find(data,"%%") then
+                for i=1,#splitdata do
+                    local line = splitdata[i]
+                    if find(line,"%%") then
+                        splitdata[i] = gsub(line,"%%.*$","")
+                        logs.simple("removing comment: %s",line)
                     end
-                    break
                 end
             end
         end
-        local c, h = { }, { }
-        for line in data:gmatch("[^\n\r]+") do
-            local txt, cmt = line:match("^(.-)%%(.*)$")
-            if not txt then
-                txt, cmt = line, ""
-            end
-            for s in txt:gmatch("\\([a-zA-Z]+)") do
-                h[s] = (h[s] or 0) + 1
-            end
-            for s in cmt:gmatch("\\([a-zA-Z]+)") do
-                c[s] = (c[s] or 0) + 1
-            end
-        end
-        h.patterns = nil
-        h.hyphenation = nil
-        for k, v in next, h do
-            if not permitted_commands[k] then okay = false end
-            if mnemonic then
-                logs.simple("command \\%s found in language %s, file %s, n=%s",k,mnemonic,name,v)
-            else
-                logs.simple("command \\%s found in file %s, n=%s",k,name,v)
+        check(patterns,splitpatternsnew,patfile)
+        check(hyphenations,splithyphenationsnew,hypfile)
+    end
+    if okay then
+        -- remove lines with commands
+        local function check(data,splitdata,name)
+            if find(data,"\\") then
+                for i=1,#splitdata do
+                    local line = splitdata[i]
+                    if find(line,"\\") then
+                        splitdata[i] = ""
+                        logs.simple("removing line with command: %s",line)
+                    end
+                end
             end
         end
-        if not environment.argument("fast") then
-            for k, v in next, c do
-                if mnemonic then
-                    logs.simple("command \\%s found in comment of language %s, file %s, n=%s",k,mnemonic,name,v)
-                else
-                    logs.simple("command \\%s found in comment of file %s, n=%s",k,name,v)
+        check(patterns,splitpatternsnew,patfile)
+        check(hyphenations,splithyphenationsnew,hypfile)
+    end
+    if okay then
+        -- check for valid utf
+        local function check(data,splitdata,name)
+            for i=1,#splitdata do
+                local line = splitdata[i]
+                local ok = lpegmatch(validutf8,line)
+                if not ok then
+                    splitdata[i] = ""
+                    logs.simple("removing line with invalid utf: %s",line)
                 end
             end
+            -- check for commands being used in comments
         end
-        data = data:gsub("%%.-[\n\r]","")
-        data = data:gsub(" *[\n\r]+","\n")
-        local patterns = data:match("\\patterns[%s]*{[%s]*(.-)[%s]*}") or ""
-        local hyphenations = data:match("\\hyphenation[%s]*{[%s]*(.-)[%s]*}") or ""
-        patterns = patterns:gsub("[ \t]+","\n")
-        hyphenations = hyphenations:gsub("[ \t]+","\n")
-        local p, h = { }, { }
-        local pats, hyps = { } , { }
-        local pused, hused = { } , { }
-        local period = byte(".")
-        for line in patterns:gmatch("[^ \n\r]+") do
-            local ok = true
-            for b in line:utfvalues() do
-                if b == period then
-                    -- ok
-                else
-                    local ct = cd[b].category
-                    if ct == "lu" or ct == "ll" then
-                        pused[char(b)] = true
-                    elseif ct == "nd" then
-                        -- ok
+        check(patterns,splitpatternsnew,patfile)
+        check(hyphenations,splithyphenationsnew,hypfile)
+    end
+    if okay then
+        -- remove funny lines
+        local cd = characters.data
+        local stripped = { }
+        local function check(splitdata,special,name)
+            local used = { }
+            for i=1,#splitdata do
+                local line = splitdata[i]
+                for b in line:utfvalues() do -- could be an lpeg
+                    if b == special then
+                        -- not registered
+                    elseif permitted_characters[b] then
+                        used[char(b)] = true
                     else
-                        p[b] = (p[b] or 0) + 1
-                        ok = false
+                        local cdb = cd[b]
+                        if not cdb then
+                            logs.simple("no entry in chardata for character %s (0x%04X)",char(b),b)
+                        else
+                            local ct = cd[b].category
+                            if ct == "lu" or ct == "ll" then
+                                used[char(b)] = true
+                            elseif ct == "nd" then
+                                -- number
+                            else
+                                logs.simple("removing line with suspected utf character %s (0x%04X), category %s: %s",char(b),b,ct,line)
+                                splitdata[i] = ""
+                                break
+                            end
+                        end
                     end
                 end
             end
-            if ok then
-                pats[#pats+1] = line
-            end
+            return  used
         end
-        local hyphen = byte("-")
-        for line in hyphenations:gmatch("[^ \n\r]+") do
-            local ok = true
-            for b in line:utfvalues() do
-                if b == hyphen then
-                    -- ok
-                else
-                    local ct = cd[b].category
-                    if ct == "lu" or ct == "ll" then
-                        hused[char(b)] = true
+        usedpatterncharacters = check(splitpatternsnew,byte("."))
+        usedhyphenationcharacters = check(splithyphenationsnew,byte("-"))
+        for k, v in next, stripped do
+            logs.simple("entries that contain character %s (0x%04X) have been omitted",char(k),k)
+        end
+    end
+    if okay then
+        local function stripped(what,ignored)
+            -- ignored (per language)
+            local p = nil
+            if ignored then
+                for k, v in next, ignored do
+                    if p then
+                        p = p + lpeg.P(char(k))
                     else
-                        h[b] = (h[b] or 0) + 1
-                        ok = false
+                        p = lpeg.P(char(k))
                     end
                 end
+                p = lpeg.P{ p + 1 * lpeg.V(1) } -- anywhere
             end
-            if ok then
-                hyps[#hyps+1] = line
-            end
-        end
-        local stripped = { }
-        for k, v in next, p do
-            if mnemonic then
-                logs.simple("invalid character %s (0x%04X) in patterns of language %s, file %s, n=%s",char(k),k,mnemonic,name,v)
-            else
-                logs.simple("invalid character %s (0x%04X) in patterns of file %s, n=%s",char(k),k,name,v)
+            -- replaced (all languages)
+            local r = nil
+            for k, v in next, replaced_whatever do
+                if r then
+                    r = r + lpeg.P(k)/v
+                else
+                    r = lpeg.P(k)/v
+                end
             end
-            if not permitted_characters[k] then
-                okay = false
-            else
-                stripped[k] = true
+            r = lpeg.Cs((r + 1)^0)
+            local result = { }
+            for i=1,#what do
+                local line = what[i]
+                if p and lpegmatch(p,line) then
+                    logs.simple("discarding conflicting pattern: %s",line)
+                else -- we can speed this up by testing for replacements in the string
+                    local l = lpegmatch(r,line)
+                    if l ~= line then
+                        logs.simple("sanitizing pattern: %s -> %s (for old patterns)",line,l)
+                    end
+                    result[#result+1] = l
+                end
             end
+            return result
         end
-        for k, v in next, h do
-            if mnemonic then
-                logs.simple("invalid character %s (0x%04X) in exceptions of language %s, file %s, n=%s",char(k),k,mnemonic,name,v)
-            else
-                logs.simple("invalid character %s (0x%04X) in exceptions of file %s, n=%s",char(k),k,name,v)
-            end
-            if not permitted_characters[k] then
-                okay = false
-            else
-                stripped[k] = true
+
+        splitpatternsold = stripped(splitpatternsnew,ignored)
+        splithyphenationsold = stripped(splithyphenationsnew,ignored)
+
+    end
+    if okay then
+        -- discarding duplicates
+        local function check(data,splitdata,name)
+            local used, collected = { }, { }
+            for i=1,#splitdata do
+                local line = splitdata[i]
+                if line == "" then
+                    -- discard
+                elseif used[line] then
+                    -- discard
+                    logs.simple("discarding duplicate pattern: %s",line)
+                else
+                    used[line] = true
+                    collected[#collected+1] = line
+                end
             end
+            return collected
         end
-        local stripset = ""
-        for k, v in next, stripped do
-            logs.simple("entries that contain character %s will be omitted",char(k))
-            stripset = stripset .. "%" .. char(k)
-        end
-        return okay, pats, hyps, comment, stripset, pused, hused
-    else
-        if mnemonic then
-            logs.simple("no file %s for language %s",fullname,mnemonic)
-        else
-            logs.simple("no file %s",fullname)
-        end
-        return false, { }, { }, "", "", { }, { }
+        splitpatternsnew = check(patterns,splitpatternsnew,patfile)
+        splithyphenationsnew = check(hyphenations,splithyphenationsnew,hypfile)
+        splitpatternsold = check(patterns,splitpatternsold,patfile)
+        splithyphenationsold = check(hyphenations,splithyphenationsold,hypfile)
+    end
+    if not okay then
+        logs.simple("no valid file %s.*",name)
     end
+    return okay, splitpatternsnew, splithyphenationsnew, splitpatternsold, splithyphenationsold, comment, stripset, usedpatterncharacters, usedhyphenationcharacters
 end
 
-function scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,comment,stripped,pused,hused)
-    local nofpatterns = #patterns
-    local nofhyphenations = #hyphenations
-    logs.simple("language %s has %s patterns and %s exceptions",mnemonic,nofpatterns,nofhyphenations)
+function scripts.patterns.save(destination,mnemonic,name,patternsnew,hyphenationsnew,patternsold,hyphenationsold,comment,stripped,pused,hused,ignored)
+    local nofpatternsnew, nofhyphenationsnew = #patternsnew, #hyphenationsnew
+    local nofpatternsold, nofhyphenationsold = #patternsold, #hyphenationsold
+    logs.simple("language %s has %s old and %s new patterns and %s old and %s new exceptions",mnemonic,nofpatternsold,nofpatternsnew,nofhyphenationsold,nofhyphenationsnew)
     if mnemonic ~= "??" then
         local pu = concat(table.sortedkeys(pused), " ")
         local hu = concat(table.sortedkeys(hused), " ")
@@ -318,28 +365,28 @@ function scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,c
         }
 
         local patterndata, hyphenationdata
-        if nofpatterns > 0 then
+        if nofpatternsnew > 0 then
             patterndata = {
-                n            = nofpatterns,
-                data         = concat(patterns," ") or nil,
+                n            = nofpatternsnew,
+                data         = concat(patternsnew," ") or nil,
                 characters   = concat(table.sortedkeys(pused),""),
                 minhyphenmin = 1, -- determined by pattern author
                 minhyphenmax = 1, -- determined by pattern author
             }
         else
             patterndata = {
-                n = nofpatterns,
+                n = 0,
             }
         end
-        if nofhyphenations > 0 then
+        if nofhyphenationsnew > 0 then
             hyphenationdata = {
-                n          = nofhyphenations,
-                data       = concat(hyphenations," "),
+                n          = nofhyphenationsnew,
+                data       = concat(hyphenationsnew," "),
                 characters = concat(table.sortedkeys(hused),""),
             }
         else
             hyphenationdata = {
-                n = nofhyphenations,
+                n = 0,
             }
         end
         local data = {
@@ -358,8 +405,8 @@ function scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,c
         os.remove(luafile)
 
         io.savedata(rmefile,format("%s\n\n%s",topline,comment))
-        io.savedata(patfile,format("%s\n\n%s\n\n%% used: %s\n\n\\patterns{\n%s}",topline,banner,pu,concat(patterns,"\n")))
-        io.savedata(hypfile,format("%s\n\n%s\n\n%% used: %s\n\n\\hyphenation{\n%s}",topline,banner,hu,concat(hyphenations,"\n")))
+        io.savedata(patfile,format("%s\n\n%s\n\n%% used: %s\n\n\\patterns{\n%s}",topline,banner,pu,concat(patternsold,"\n")))
+        io.savedata(hypfile,format("%s\n\n%s\n\n%% used: %s\n\n\\hyphenation{\n%s}",topline,banner,hu,concat(hyphenationsold,"\n")))
         io.savedata(luafile,table.serialize(data,true))
     end
 end
@@ -370,22 +417,16 @@ end
 
 function scripts.patterns.check()
     local path = environment.argument("path") or "."
-    local found = false
     local files = environment.files
+    local only  = false
     if #files > 0 then
-        for i=1,#files do
-            local name = files[i]
-            logs.simple("checking language file %s", name)
-            local okay = scripts.patterns.load(path,name,nil,not environment.argument("fast"))
-            if #environment.files > 1 then
-                logs.simple("")
-            end
-        end
-    else
-        for k, v in next, scripts.patterns.list do
-            local mnemonic, name = v[1], v[2]
+        only = table.tohash(files)
+    end
+    for k, v in next, scripts.patterns.list do
+        local mnemonic, name, ignored = v[1], v[2], v[4]
+        if not only or only[mnemonic] then
             logs.simple("checking language %s, file %s", mnemonic, name)
-            local okay = scripts.patterns.load(path,name,mnemonic,not environment.argument("fast"))
+            local okay = scripts.patterns.load(path,name,mnemonic,ignored)
             if not okay then
                 logs.simple("there are errors that need to be fixed")
             end
@@ -403,16 +444,23 @@ function scripts.patterns.convert()
         if path == destination then
             logs.simple("source path and destination path should differ (use --path and/or --destination)")
         else
+            local files = environment.files
+            local only  = false
+            if #files > 0 then
+                only = table.tohash(files)
+            end
             for k, v in next, scripts.patterns.list do
-                local mnemonic, name = v[1], v[2]
-                logs.simple("converting language %s, file %s", mnemonic, name)
-                local okay, patterns, hyphenations, comment, stripped, pused, hused = scripts.patterns.load(path,name,false)
-                if okay then
-                    scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,comment,stripped,pused,hused)
-                else
-                    logs.simple("convertion aborted due to error(s)")
+                local mnemonic, name, ignored = v[1], v[2], v[4]
+                if not only or only[mnemonic] then
+                    logs.simple("converting language %s, file %s", mnemonic, name)
+                    local okay, patternsnew, hyphenationsnew, patternsold, hyphenationsold, comment, stripped, pused, hused = scripts.patterns.load(path,name,mnemonic,ignored)
+                    if okay then
+                        scripts.patterns.save(destination,mnemonic,name,patternsnew,hyphenationsnew,patternsold,hyphenationsold,comment,stripped,pused,hused,ignored)
+                    else
+                        logs.simple("convertion aborted due to error(s)")
+                    end
+                    logs.simple("")
                 end
-                logs.simple("")
             end
         end
     end
@@ -425,8 +473,6 @@ messages.help = [[
 --check               check pattern file (or those used by context when no file given)
 --path                source path where hyph-foo.tex files are stored
 --destination         destination path
-
---fast                only report filenames, no lines
 ]]
 
 if environment.argument("check") then
@@ -441,6 +487,5 @@ end
 
 -- mtxrun --script pattern --check hyph-*.tex
 -- mtxrun --script pattern --check          --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns
--- mtxrun --script pattern --check   --fast --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns
--- mtxrun --script pattern --convert        --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns --destination=e:/tmp/patterns
--- mtxrun --script pattern --convert        --path=c:/data/develop/svn-hyphen/branches/luatex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex --destination=e:/tmp/patterns
+-- mtxrun --script pattern --convert        --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/tex --destination=e:/tmp/patterns
+-- mtxrun --script pattern --convert        --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns
author	Marius <mariausol@gmail.com>	2010-08-14 15:56:20 +0300
committer	Marius <mariausol@gmail.com>	2010-08-14 15:56:20 +0300
commit	b469b8ec1b494ab72cd462bfc539ce01440e6aaf (patch)
tree	3a9c3fb8433c5f75020fef1d531bedb7c948f66c /scripts/context/lua/mtx-patterns.lua
parent	39e30629c15ae4a899532d84c4abea127f2847a6 (diff)
download	context-b469b8ec1b494ab72cd462bfc539ce01440e6aaf.tar.gz