summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtx-patterns.lua
diff options
context:
space:
mode:
authorMarius <mariausol@gmail.com>2010-08-14 15:56:20 +0300
committerMarius <mariausol@gmail.com>2010-08-14 15:56:20 +0300
commitb469b8ec1b494ab72cd462bfc539ce01440e6aaf (patch)
tree3a9c3fb8433c5f75020fef1d531bedb7c948f66c /scripts/context/lua/mtx-patterns.lua
parent39e30629c15ae4a899532d84c4abea127f2847a6 (diff)
downloadcontext-b469b8ec1b494ab72cd462bfc539ce01440e6aaf.tar.gz
beta 2010.08.10 17:14
Diffstat (limited to 'scripts/context/lua/mtx-patterns.lua')
-rw-r--r--scripts/context/lua/mtx-patterns.lua593
1 files changed, 319 insertions, 274 deletions
diff --git a/scripts/context/lua/mtx-patterns.lua b/scripts/context/lua/mtx-patterns.lua
index c3817e9a8..a51ca5860 100644
--- a/scripts/context/lua/mtx-patterns.lua
+++ b/scripts/context/lua/mtx-patterns.lua
@@ -6,95 +6,111 @@ if not modules then modules = { } end modules ['mtx-patterns'] = {
license = "see context related readme files"
}
-local format, find, concat = string.format, string.find, table.concat
+local format, find, concat, gsub, match, gmatch = string.format, string.find, table.concat, string.gsub, string.match, string.gmatch
+local byte, char = utf.byte, utf.char
+local addsuffix = file.addsuffix
+local lpegmatch, validutf8 = lpeg.match, lpeg.patterns.validutf8
scripts = scripts or { }
scripts.patterns = scripts.patterns or { }
+
+local permitted_characters = table.tohash {
+ 0x0009, -- tab
+ 0x0027, -- apostrofe
+ 0x02BC, -- modifier apostrofe (used in greek)
+ 0x002D, -- hyphen
+ 0x200C, -- zwnj
+ 0x2019, -- quote right
+ 0x1FBD, -- greek, but no letter: symbol modifier
+ 0x1FBF, -- greek, but no letter: symbol modifier
+}
+
+local ignored_ancient_greek = table.tohash {
+ 0x1FD3, -- greekiotadialytikatonos (also 0x0390)
+ 0x1FE3, -- greekupsilondialytikatonos (also 0x03B0)
+ 0x1FBD, -- greek, but no letter: symbol modifier
+ 0x1FBF, -- greek, but no letter: symbol modifier
+ 0x03F2, -- greeksigmalunate
+ 0x02BC, -- modifier apostrofe)
+}
+
+local ignored_french = table.tohash {
+ 0x02BC, -- modifier apostrofe
+}
+
+local replaced_whatever = {
+ [char(0x2019)] = char(0x0027)
+}
+
scripts.patterns.list = {
- -- no patterns for arabic
--- { "ar", "hyph-ar.tex", "arabic" },
- -- not supported
--- { "as", "hyph-as.tex", "assamese" },
- { "bg", "hyph-bg.tex", "bulgarian" },
- -- not supported
--- { "bn", "hyph-bn.tex", "bengali" },
- { "ca", "hyph-ca.tex", "catalan" },
- -- not supported
--- { "cop", "hyph-cop.tex", "coptic" },
- { "cs", "hyph-cs.tex", "czech" },
- { "cy", "hyph-cy.tex", "welsh" },
- { "da", "hyph-da.tex", "danish" },
- { "deo", "hyph-de-1901.tex", "german, old spelling" },
- { "de", "hyph-de-1996.tex", "german, new spelling" },
- { "??", "hyph-de-ch-1901.tex", "swiss german" },
---~ { "??", "hyph-el-monoton.tex", "" },
---~ { "??", "hyph-el-polyton.tex", "" },
- { "agr", "hyph-grc.tex", "ancient greek" },
- { "gb", "hyph-en-gb.tex", "british english" },
- { "us", "hyph-en-us.tex", "american english" },
---~ { "gr", "", "" },
- -- these patterns do not satisfy the rules of 'clean patterns'
--- { "eo", "hyph-eo.tex", "esperanto" },
- { "es", "hyph-es.tex", "spanish" },
- { "et", "hyph-et.tex", "estonian" },
- { "eu", "hyph-eu.tex", "basque" },
- -- no patterns for farsi/persian
--- { "fa", "hyph-fa.tex", "farsi" },
- { "fi", "hyph-fi.tex", "finnish" },
- { "fr", "hyph-fr.tex", "french" },
- { "??", "hyph-ga.tex", "irish" },
- { "??", "hyph-gl.tex", "galician" },
- -- not supported
--- { "gu", "hyph-gu.tex", "gujarati" },
- -- not supported
--- { "hi", "hyph-hi.tex", "hindi" },
- { "hr", "hyph-hr.tex", "croatian" },
- { "??", "hyph-hsb.tex", "upper sorbian" },
- { "hu", "hyph-hu.tex", "hungarian" },
- -- not supported
--- { "hy", "hyph-hy.tex", "armenian" },
- { "??", "hyph-ia.tex", "interlingua" },
- { "??", "hyph-id.tex", "indonesian" },
- { "is", "hyph-is.tex", "icelandic" },
- { "it", "hyph-it.tex", "italian" },
- { "??", "hyph-kmr.tex", "kurmanji" },
- -- not supported
--- { "kn", "hyph-kn.tex", "kannada" },
- { "la", "hyph-la.tex", "latin" },
- -- not supported
--- { "lo", "hyph-lo.tex", "lao" },
- { "lt", "hyph-lt.tex", "lithuanian" },
- { "??", "hyph-lv.tex", "latvian" },
- { "mn", "hyph-mn-cyrl.tex", "mongolian, cyrillic script" },
- { "nb", "hyph-nb.tex", "norwegian bokmål" },
- { "nl", "hyph-nl.tex", "dutch" },
- { "nn", "hyph-nn.tex", "norwegian nynorsk" },
- -- not supported
--- { "or", "hyph-or.tex", "oriya" },
- -- not supported
--- { "pa", "hyph-pa.tex", "panjabi" },
- -- not supported
--- { "", "hyph-.tex", "" },
- { "pl", "hyph-pl.tex", "polish" },
- { "pt", "hyph-pt.tex", "portuguese" },
- { "ro", "hyph-ro.tex", "romanian" },
- { "ru", "hyph-ru.tex", "russian" },
- -- not supported
--- { "sa", "hyph-sa.tex", "sanskrit" },
- { "sk", "hyph-sk.tex", "slovak" },
- { "sl", "hyph-sl.tex", "slovenian" },
- -- TODO: there is both Cyrillic and Latin script available
- { "sr", "hyph-sr-cyrl.tex", "serbian" },
- { "sv", "hyph-sv.tex", "swedish" },
- -- not supported
--- { "ta", "hyph-ta.tex", "tamil" },
- -- not supported
--- { "te", "hyph-te.tex", "telugu" },
- { "tk", "hyph-tk.tex", "turkmen" },
- { "tr", "hyph-tr.tex", "turkish" },
- { "uk", "hyph-uk.tex", "ukrainian" },
- { "zh", "hyph-zh-latn.tex", "zh-latn, chinese Pinyin" },
+ -- { "ar", "hyph-ar", "arabic" },
+ -- { "as", "hyph-as", "assamese" },
+ { "bg", "hyph-bg", "bulgarian" },
+ -- { "bn", "hyph-bn", "bengali" },
+ { "ca", "hyph-ca", "catalan" },
+ -- { "??", "hyph-cop", "coptic" },
+ { "cs", "hyph-cs", "czech" },
+ { "cy", "hyph-cy", "welsh" },
+ { "da", "hyph-da", "danish" },
+ { "deo", "hyph-de-1901", "german, old spelling" },
+ { "de", "hyph-de-1996", "german, new spelling" },
+ -- { "??", "hyph-de-ch-1901", "swiss german" },
+ -- { "??", "hyph-el-monoton", "greek" },
+ -- { "gr", "hyph-el-polyton", "greek" },
+ { "agr", "hyph-grc", "ancient greek", ignored_ancient_greek },
+ { "gb", "hyph-en-gb", "british english" },
+ { "us", "hyph-en-us", "american english" },
+ -- { "eo", "hyph-eo", "esperanto" },
+ { "es", "hyph-es", "spanish" },
+ { "et", "hyph-et", "estonian" },
+ { "eu", "hyph-eu", "basque" },
+ -- { "fa", "hyph-fa", "farsi" },
+ { "fi", "hyph-fi", "finnish" },
+ { "fr", "hyph-fr", "french", ignored_french },
+ -- { "??", "hyph-ga", "irish" },
+ -- { "??", "hyph-gl", "galician" },
+ -- { "gu", "hyph-gu", "gujarati" },
+ -- { "hi", "hyph-hi", "hindi" },
+ { "hr", "hyph-hr", "croatian" },
+ -- { "??", "hyph-hsb", "upper sorbian" },
+ { "hu", "hyph-hu", "hungarian" },
+ -- { "hy", "hyph-hy", "armenian" },
+ -- { "??", "hyph-ia", "interlingua" },
+ -- { "??", "hyph-id", "indonesian" },
+ { "is", "hyph-is", "icelandic" },
+ { "it", "hyph-it", "italian" },
+ -- { "??", "hyph-kmr", "kurmanji" },
+ -- { "kn", "hyph-kn", "kannada" },
+ { "la", "hyph-la", "latin" },
+ -- { "lo", "hyph-lo", "lao" },
+ { "lt", "hyph-lt", "lithuanian" },
+ { "lv", "hyph-lv", "latvian" },
+ -- { "ml", "hyph-ml", "..." },
+ { "mn", "hyph-mn-cyrl", "mongolian, cyrillic script" },
+ -- { "mr", "hyph-mr", "..." },
+ { "nb", "hyph-nb", "norwegian bokmål" },
+ { "nl", "hyph-nl", "dutch" },
+ { "nn", "hyph-nn", "norwegian nynorsk" },
+ -- { "or", "hyph-or", "oriya" },
+ -- { "pa", "hyph-pa", "panjabi" },
+ -- { "", "hyph-", "" },
+ { "pl", "hyph-pl", "polish" },
+ { "pt", "hyph-pt", "portuguese" },
+ { "ro", "hyph-ro", "romanian" },
+ { "ru", "hyph-ru", "russian" },
+ -- { "sa", "hyph-sa", "sanskrit" },
+ { "sk", "hyph-sk", "slovak" },
+ { "sl", "hyph-sl", "slovenian" },
+ { "sr", "hyph-sr-cyrl", "serbian" },
+ -- { "sr", "hyph-sr-latn", "serbian" },
+ { "sv", "hyph-sv", "swedish" },
+ -- { "ta", "hyph-ta", "tamil" },
+ -- { "te", "hyph-te", "telugu" },
+ { "tk", "hyph-tk", "turkmen" },
+ { "tr", "hyph-tr", "turkish" },
+ { "uk", "hyph-uk", "ukrainian" },
+ { "zh", "hyph-zh-latn", "zh-latn, chinese pinyin" },
}
-- stripped down from lpeg example:
@@ -105,189 +121,220 @@ function utf.check(str)
return lpeg.match(lpeg.patterns.validutf8,str)
end
-local permitted_commands = table.tohash {
- "message",
- "endinput"
-}
-
-local permitted_characters = table.tohash {
- 0x0009, -- tab
- 0x0027, -- apostrofe
- 0x002D, -- hyphen
- 0x200C, --
-}
+-- *.tex
+-- *.hyp.txt *.pat.txt *.lic.txt *.chr.txt
-function scripts.patterns.load(path,name,mnemonic,fullcheck)
+function scripts.patterns.load(path,name,mnemonic,ignored)
local fullname = file.join(path,name)
- local data = io.loaddata(fullname) or ""
- local byte, char = utf.byte, utf.char
- if data ~= "" then
- data = data:gsub("([\n\r])\\input ([^ \n\r]+)", function(previous,subname)
- local subname = file.addsuffix(subname,"tex")
- local subfull = file.join(file.dirname(fullname),subname)
- local subdata = io.loaddata(subfull) or ""
- if subdata == "" then
- if mnemonic then
- logs.simple("no subfile %s for language %s",subname,mnemonic)
- else
- logs.simple("no subfile %s",name)
+ local texfile = addsuffix(fullname,"tex")
+ local hypfile = addsuffix(fullname,"hyp.txt")
+ local patfile = addsuffix(fullname,"pat.txt")
+ local licfile = addsuffix(fullname,"lic.txt")
+ -- local chrfile = addsuffix(fullname,"chr.txt")
+ local okay = true
+ local hyphenations, patterns, comment, stripset = "", "", "", ""
+ local splitpatternsnew, splithyphenationsnew = { }, { }
+ local splitpatternsold, splithyphenationsold = { }, { }
+ local usedpatterncharacters, usedhyphenationcharacters = { }, { }
+ if lfs.isfile(patfile) then
+ logs.simple("using txt files %s.[hyp|pat|lic].txt",name)
+ comment, patterns, hyphenations = io.loaddata(licfile) or "", io.loaddata(patfile) or "", io.loaddata(hypfile) or ""
+ hypfile, patfile, licfile = hypfile, patfile, licfile
+ elseif lfs.isfile(texfile) then
+ logs.simple("using tex file %s.txt",name)
+ local data = io.loaddata(texfile) or ""
+ if data ~= "" then
+ data = gsub(data,"([\n\r])\\input ([^ \n\r]+)", function(previous,subname)
+ local subname = addsuffix(subname,"tex")
+ local subfull = file.join(file.dirname(texfile),subname)
+ local subdata = io.loaddata(subfull) or ""
+ if subdata == "" then
+ logs.simple("no subfile %s",subname)
end
- end
- return previous .. subdata
- end)
- local comment = data:match("^(.-)[\n\r]\\patterns") or ""
- local n, okay = 0, true
- local cd = characters.data
- for line in data:gmatch("[^ \n\r]+") do
- local ok = utf.check(line)
- n = n + 1
- if not ok then
- okay = false
- line = line:gsub("%%","%%%%")
- if fullcheck then
- if mnemonic then
- logs.simple("invalid utf in language %s, file %s, line %s: %s",mnemonic,name,n,line)
- else
- logs.simple("invalid utf in file %s, line %s: %s",name,n,line)
- end
- else
- if mnemonic then
- logs.simple("file %s for %s contains invalid utf",name,mnemonic)
- else
- logs.simple("file %s contains invalid utf",name)
+ return previous .. subdata
+ end)
+ data = gsub(data,"%%.-[\n\r]","")
+ data = gsub(data," *[\n\r]+","\n")
+ patterns = match(data,"\\patterns[%s]*{[%s]*(.-)[%s]*}") or ""
+ hyphenations = match(data,"\\hyphenation[%s]*{[%s]*(.-)[%s]*}") or ""
+ comment = match(data,"^(.-)[\n\r]\\patterns") or ""
+ else
+ okay = false
+ end
+ else
+ okay = false
+ end
+ if okay then
+ -- split into lines
+ local how = lpeg.patterns.whitespace^1
+ splitpatternsnew = lpeg.split(how,patterns)
+ splithyphenationsnew = lpeg.split(how,hyphenations)
+ end
+ if okay then
+ -- remove comments
+ local function check(data,splitdata,name)
+ if find(data,"%%") then
+ for i=1,#splitdata do
+ local line = splitdata[i]
+ if find(line,"%%") then
+ splitdata[i] = gsub(line,"%%.*$","")
+ logs.simple("removing comment: %s",line)
end
- break
end
end
end
- local c, h = { }, { }
- for line in data:gmatch("[^\n\r]+") do
- local txt, cmt = line:match("^(.-)%%(.*)$")
- if not txt then
- txt, cmt = line, ""
- end
- for s in txt:gmatch("\\([a-zA-Z]+)") do
- h[s] = (h[s] or 0) + 1
- end
- for s in cmt:gmatch("\\([a-zA-Z]+)") do
- c[s] = (c[s] or 0) + 1
- end
- end
- h.patterns = nil
- h.hyphenation = nil
- for k, v in next, h do
- if not permitted_commands[k] then okay = false end
- if mnemonic then
- logs.simple("command \\%s found in language %s, file %s, n=%s",k,mnemonic,name,v)
- else
- logs.simple("command \\%s found in file %s, n=%s",k,name,v)
+ check(patterns,splitpatternsnew,patfile)
+ check(hyphenations,splithyphenationsnew,hypfile)
+ end
+ if okay then
+ -- remove lines with commands
+ local function check(data,splitdata,name)
+ if find(data,"\\") then
+ for i=1,#splitdata do
+ local line = splitdata[i]
+ if find(line,"\\") then
+ splitdata[i] = ""
+ logs.simple("removing line with command: %s",line)
+ end
+ end
end
end
- if not environment.argument("fast") then
- for k, v in next, c do
- if mnemonic then
- logs.simple("command \\%s found in comment of language %s, file %s, n=%s",k,mnemonic,name,v)
- else
- logs.simple("command \\%s found in comment of file %s, n=%s",k,name,v)
+ check(patterns,splitpatternsnew,patfile)
+ check(hyphenations,splithyphenationsnew,hypfile)
+ end
+ if okay then
+ -- check for valid utf
+ local function check(data,splitdata,name)
+ for i=1,#splitdata do
+ local line = splitdata[i]
+ local ok = lpegmatch(validutf8,line)
+ if not ok then
+ splitdata[i] = ""
+ logs.simple("removing line with invalid utf: %s",line)
end
end
+ -- check for commands being used in comments
end
- data = data:gsub("%%.-[\n\r]","")
- data = data:gsub(" *[\n\r]+","\n")
- local patterns = data:match("\\patterns[%s]*{[%s]*(.-)[%s]*}") or ""
- local hyphenations = data:match("\\hyphenation[%s]*{[%s]*(.-)[%s]*}") or ""
- patterns = patterns:gsub("[ \t]+","\n")
- hyphenations = hyphenations:gsub("[ \t]+","\n")
- local p, h = { }, { }
- local pats, hyps = { } , { }
- local pused, hused = { } , { }
- local period = byte(".")
- for line in patterns:gmatch("[^ \n\r]+") do
- local ok = true
- for b in line:utfvalues() do
- if b == period then
- -- ok
- else
- local ct = cd[b].category
- if ct == "lu" or ct == "ll" then
- pused[char(b)] = true
- elseif ct == "nd" then
- -- ok
+ check(patterns,splitpatternsnew,patfile)
+ check(hyphenations,splithyphenationsnew,hypfile)
+ end
+ if okay then
+ -- remove funny lines
+ local cd = characters.data
+ local stripped = { }
+ local function check(splitdata,special,name)
+ local used = { }
+ for i=1,#splitdata do
+ local line = splitdata[i]
+ for b in line:utfvalues() do -- could be an lpeg
+ if b == special then
+ -- not registered
+ elseif permitted_characters[b] then
+ used[char(b)] = true
else
- p[b] = (p[b] or 0) + 1
- ok = false
+ local cdb = cd[b]
+ if not cdb then
+ logs.simple("no entry in chardata for character %s (0x%04X)",char(b),b)
+ else
+ local ct = cd[b].category
+ if ct == "lu" or ct == "ll" then
+ used[char(b)] = true
+ elseif ct == "nd" then
+ -- number
+ else
+ logs.simple("removing line with suspected utf character %s (0x%04X), category %s: %s",char(b),b,ct,line)
+ splitdata[i] = ""
+ break
+ end
+ end
end
end
end
- if ok then
- pats[#pats+1] = line
- end
+ return used
end
- local hyphen = byte("-")
- for line in hyphenations:gmatch("[^ \n\r]+") do
- local ok = true
- for b in line:utfvalues() do
- if b == hyphen then
- -- ok
- else
- local ct = cd[b].category
- if ct == "lu" or ct == "ll" then
- hused[char(b)] = true
+ usedpatterncharacters = check(splitpatternsnew,byte("."))
+ usedhyphenationcharacters = check(splithyphenationsnew,byte("-"))
+ for k, v in next, stripped do
+ logs.simple("entries that contain character %s (0x%04X) have been omitted",char(k),k)
+ end
+ end
+ if okay then
+ local function stripped(what,ignored)
+ -- ignored (per language)
+ local p = nil
+ if ignored then
+ for k, v in next, ignored do
+ if p then
+ p = p + lpeg.P(char(k))
else
- h[b] = (h[b] or 0) + 1
- ok = false
+ p = lpeg.P(char(k))
end
end
+ p = lpeg.P{ p + 1 * lpeg.V(1) } -- anywhere
end
- if ok then
- hyps[#hyps+1] = line
- end
- end
- local stripped = { }
- for k, v in next, p do
- if mnemonic then
- logs.simple("invalid character %s (0x%04X) in patterns of language %s, file %s, n=%s",char(k),k,mnemonic,name,v)
- else
- logs.simple("invalid character %s (0x%04X) in patterns of file %s, n=%s",char(k),k,name,v)
+ -- replaced (all languages)
+ local r = nil
+ for k, v in next, replaced_whatever do
+ if r then
+ r = r + lpeg.P(k)/v
+ else
+ r = lpeg.P(k)/v
+ end
end
- if not permitted_characters[k] then
- okay = false
- else
- stripped[k] = true
+ r = lpeg.Cs((r + 1)^0)
+ local result = { }
+ for i=1,#what do
+ local line = what[i]
+ if p and lpegmatch(p,line) then
+ logs.simple("discarding conflicting pattern: %s",line)
+ else -- we can speed this up by testing for replacements in the string
+ local l = lpegmatch(r,line)
+ if l ~= line then
+ logs.simple("sanitizing pattern: %s -> %s (for old patterns)",line,l)
+ end
+ result[#result+1] = l
+ end
end
+ return result
end
- for k, v in next, h do
- if mnemonic then
- logs.simple("invalid character %s (0x%04X) in exceptions of language %s, file %s, n=%s",char(k),k,mnemonic,name,v)
- else
- logs.simple("invalid character %s (0x%04X) in exceptions of file %s, n=%s",char(k),k,name,v)
- end
- if not permitted_characters[k] then
- okay = false
- else
- stripped[k] = true
+
+ splitpatternsold = stripped(splitpatternsnew,ignored)
+ splithyphenationsold = stripped(splithyphenationsnew,ignored)
+
+ end
+ if okay then
+ -- discarding duplicates
+ local function check(data,splitdata,name)
+ local used, collected = { }, { }
+ for i=1,#splitdata do
+ local line = splitdata[i]
+ if line == "" then
+ -- discard
+ elseif used[line] then
+ -- discard
+ logs.simple("discarding duplicate pattern: %s",line)
+ else
+ used[line] = true
+ collected[#collected+1] = line
+ end
end
+ return collected
end
- local stripset = ""
- for k, v in next, stripped do
- logs.simple("entries that contain character %s will be omitted",char(k))
- stripset = stripset .. "%" .. char(k)
- end
- return okay, pats, hyps, comment, stripset, pused, hused
- else
- if mnemonic then
- logs.simple("no file %s for language %s",fullname,mnemonic)
- else
- logs.simple("no file %s",fullname)
- end
- return false, { }, { }, "", "", { }, { }
+ splitpatternsnew = check(patterns,splitpatternsnew,patfile)
+ splithyphenationsnew = check(hyphenations,splithyphenationsnew,hypfile)
+ splitpatternsold = check(patterns,splitpatternsold,patfile)
+ splithyphenationsold = check(hyphenations,splithyphenationsold,hypfile)
+ end
+ if not okay then
+ logs.simple("no valid file %s.*",name)
end
+ return okay, splitpatternsnew, splithyphenationsnew, splitpatternsold, splithyphenationsold, comment, stripset, usedpatterncharacters, usedhyphenationcharacters
end
-function scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,comment,stripped,pused,hused)
- local nofpatterns = #patterns
- local nofhyphenations = #hyphenations
- logs.simple("language %s has %s patterns and %s exceptions",mnemonic,nofpatterns,nofhyphenations)
+function scripts.patterns.save(destination,mnemonic,name,patternsnew,hyphenationsnew,patternsold,hyphenationsold,comment,stripped,pused,hused,ignored)
+ local nofpatternsnew, nofhyphenationsnew = #patternsnew, #hyphenationsnew
+ local nofpatternsold, nofhyphenationsold = #patternsold, #hyphenationsold
+ logs.simple("language %s has %s old and %s new patterns and %s old and %s new exceptions",mnemonic,nofpatternsold,nofpatternsnew,nofhyphenationsold,nofhyphenationsnew)
if mnemonic ~= "??" then
local pu = concat(table.sortedkeys(pused), " ")
local hu = concat(table.sortedkeys(hused), " ")
@@ -318,28 +365,28 @@ function scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,c
}
local patterndata, hyphenationdata
- if nofpatterns > 0 then
+ if nofpatternsnew > 0 then
patterndata = {
- n = nofpatterns,
- data = concat(patterns," ") or nil,
+ n = nofpatternsnew,
+ data = concat(patternsnew," ") or nil,
characters = concat(table.sortedkeys(pused),""),
minhyphenmin = 1, -- determined by pattern author
minhyphenmax = 1, -- determined by pattern author
}
else
patterndata = {
- n = nofpatterns,
+ n = 0,
}
end
- if nofhyphenations > 0 then
+ if nofhyphenationsnew > 0 then
hyphenationdata = {
- n = nofhyphenations,
- data = concat(hyphenations," "),
+ n = nofhyphenationsnew,
+ data = concat(hyphenationsnew," "),
characters = concat(table.sortedkeys(hused),""),
}
else
hyphenationdata = {
- n = nofhyphenations,
+ n = 0,
}
end
local data = {
@@ -358,8 +405,8 @@ function scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,c
os.remove(luafile)
io.savedata(rmefile,format("%s\n\n%s",topline,comment))
- io.savedata(patfile,format("%s\n\n%s\n\n%% used: %s\n\n\\patterns{\n%s}",topline,banner,pu,concat(patterns,"\n")))
- io.savedata(hypfile,format("%s\n\n%s\n\n%% used: %s\n\n\\hyphenation{\n%s}",topline,banner,hu,concat(hyphenations,"\n")))
+ io.savedata(patfile,format("%s\n\n%s\n\n%% used: %s\n\n\\patterns{\n%s}",topline,banner,pu,concat(patternsold,"\n")))
+ io.savedata(hypfile,format("%s\n\n%s\n\n%% used: %s\n\n\\hyphenation{\n%s}",topline,banner,hu,concat(hyphenationsold,"\n")))
io.savedata(luafile,table.serialize(data,true))
end
end
@@ -370,22 +417,16 @@ end
function scripts.patterns.check()
local path = environment.argument("path") or "."
- local found = false
local files = environment.files
+ local only = false
if #files > 0 then
- for i=1,#files do
- local name = files[i]
- logs.simple("checking language file %s", name)
- local okay = scripts.patterns.load(path,name,nil,not environment.argument("fast"))
- if #environment.files > 1 then
- logs.simple("")
- end
- end
- else
- for k, v in next, scripts.patterns.list do
- local mnemonic, name = v[1], v[2]
+ only = table.tohash(files)
+ end
+ for k, v in next, scripts.patterns.list do
+ local mnemonic, name, ignored = v[1], v[2], v[4]
+ if not only or only[mnemonic] then
logs.simple("checking language %s, file %s", mnemonic, name)
- local okay = scripts.patterns.load(path,name,mnemonic,not environment.argument("fast"))
+ local okay = scripts.patterns.load(path,name,mnemonic,ignored)
if not okay then
logs.simple("there are errors that need to be fixed")
end
@@ -403,16 +444,23 @@ function scripts.patterns.convert()
if path == destination then
logs.simple("source path and destination path should differ (use --path and/or --destination)")
else
+ local files = environment.files
+ local only = false
+ if #files > 0 then
+ only = table.tohash(files)
+ end
for k, v in next, scripts.patterns.list do
- local mnemonic, name = v[1], v[2]
- logs.simple("converting language %s, file %s", mnemonic, name)
- local okay, patterns, hyphenations, comment, stripped, pused, hused = scripts.patterns.load(path,name,false)
- if okay then
- scripts.patterns.save(destination,mnemonic,name,patterns,hyphenations,comment,stripped,pused,hused)
- else
- logs.simple("convertion aborted due to error(s)")
+ local mnemonic, name, ignored = v[1], v[2], v[4]
+ if not only or only[mnemonic] then
+ logs.simple("converting language %s, file %s", mnemonic, name)
+ local okay, patternsnew, hyphenationsnew, patternsold, hyphenationsold, comment, stripped, pused, hused = scripts.patterns.load(path,name,mnemonic,ignored)
+ if okay then
+ scripts.patterns.save(destination,mnemonic,name,patternsnew,hyphenationsnew,patternsold,hyphenationsold,comment,stripped,pused,hused,ignored)
+ else
+ logs.simple("convertion aborted due to error(s)")
+ end
+ logs.simple("")
end
- logs.simple("")
end
end
end
@@ -425,8 +473,6 @@ messages.help = [[
--check check pattern file (or those used by context when no file given)
--path source path where hyph-foo.tex files are stored
--destination destination path
-
---fast only report filenames, no lines
]]
if environment.argument("check") then
@@ -441,6 +487,5 @@ end
-- mtxrun --script pattern --check hyph-*.tex
-- mtxrun --script pattern --check --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns
--- mtxrun --script pattern --check --fast --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns
--- mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns --destination=e:/tmp/patterns
--- mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/branches/luatex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex --destination=e:/tmp/patterns
+-- mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/tex --destination=e:/tmp/patterns
+-- mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns