diff options
Diffstat (limited to 'tex/context/base/l-url.lua')
-rw-r--r-- | tex/context/base/l-url.lua | 688 |
1 files changed, 344 insertions, 344 deletions
diff --git a/tex/context/base/l-url.lua b/tex/context/base/l-url.lua index 4624a0507..5cfeb252c 100644 --- a/tex/context/base/l-url.lua +++ b/tex/context/base/l-url.lua @@ -1,344 +1,344 @@ -if not modules then modules = { } end modules ['l-url'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - -local char, format, byte = string.char, string.format, string.byte -local concat = table.concat -local tonumber, type = tonumber, type -local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V -local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer - --- from wikipedia: --- --- foo://username:password@example.com:8042/over/there/index.dtb?type=animal;name=narwhal#nose --- \_/ \_______________/ \_________/ \__/ \___/ \_/ \______________________/ \__/ --- | | | | | | | | --- | userinfo hostname port | | query fragment --- | \________________________________/\_____________|____|/ --- scheme | | | | --- | authority path | | --- | | | --- | path interpretable as filename --- | ___________|____________ | --- / \ / \ | --- urn:example:animal:ferret:nose interpretable as extension - -url = url or { } -local url = url - -local tochar = function(s) return char(tonumber(s,16)) end - -local colon = P(":") -local qmark = P("?") -local hash = P("#") -local slash = P("/") -local percent = P("%") -local endofstring = P(-1) - -local hexdigit = R("09","AF","af") -local plus = P("+") -local nothing = Cc("") -local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar -local escaped = (plus / " ") + escapedchar - -local noslash = P("/") / "" - --- we assume schemes with more than 1 character (in order to avoid problems with windows disks) --- we also assume that when we have a scheme, we also have an authority --- --- maybe we should already split the query (better for unescaping as = & can be part of a value - -local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2) -local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0) -local pathstr = Cs((escaped+(1- qmark-hash))^0) ------ querystr = Cs((escaped+(1- hash))^0) -local querystr = Cs(( (1- hash))^0) -local fragmentstr = Cs((escaped+(1- endofstring))^0) - -local scheme = schemestr * colon + nothing -local authority = slash * slash * authoritystr + nothing -local path = slash * pathstr + nothing -local query = qmark * querystr + nothing -local fragment = hash * fragmentstr + nothing - -local validurl = scheme * authority * path * query * fragment -local parser = Ct(validurl) - -lpegpatterns.url = validurl -lpegpatterns.urlsplitter = parser - -local escapes = { } - -setmetatable(escapes, { __index = function(t,k) - local v = format("%%%02X",byte(k)) - t[k] = v - return v -end }) - -local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most -local unescaper = Cs((escapedchar + 1)^0) - -lpegpatterns.urlunescaped = escapedchar -lpegpatterns.urlescaper = escaper -lpegpatterns.urlunescaper = unescaper - --- todo: reconsider Ct as we can as well have five return values (saves a table) --- so we can have two parsers, one with and one without - -local function split(str) - return (type(str) == "string" and lpegmatch(parser,str)) or str -end - -local isscheme = schemestr * colon * slash * slash -- this test also assumes authority - -local function hasscheme(str) - if str then - local scheme = lpegmatch(isscheme,str) -- at least one character - return scheme ~= "" and scheme or false - else - return false - end -end - ---~ print(hasscheme("home:")) ---~ print(hasscheme("home://")) - --- todo: cache them - -local rootletter = R("az","AZ") - + S("_-+") -local separator = P("://") -local qualified = P(".")^0 * P("/") - + rootletter * P(":") - + rootletter^1 * separator - + rootletter^1 * P("/") -local rootbased = P("/") - + rootletter * P(":") - -local barswapper = replacer("|",":") -local backslashswapper = replacer("\\","/") - --- queries: - -local equal = P("=") -local amp = P("&") -local key = Cs(((escapedchar+1)-equal )^0) -local value = Cs(((escapedchar+1)-amp -endofstring)^0) - -local splitquery = Cf ( Ct("") * P { "sequence", - sequence = V("pair") * (amp * V("pair"))^0, - pair = Cg(key * equal * value), -}, rawset) - --- hasher - -local function hashed(str) -- not yet ok (/test?test) - if str == "" then - return { - scheme = "invalid", - original = str, - } - end - local s = split(str) - local rawscheme = s[1] - local rawquery = s[4] - local somescheme = rawscheme ~= "" - local somequery = rawquery ~= "" - if not somescheme and not somequery then - s = { - scheme = "file", - authority = "", - path = str, - query = "", - fragment = "", - original = str, - noscheme = true, - filename = str, - } - else -- not always a filename but handy anyway - local authority, path, filename = s[2], s[3] - if authority == "" then - filename = path - elseif path == "" then - filename = "" - else - filename = authority .. "/" .. path - end - s = { - scheme = rawscheme, - authority = authority, - path = path, - query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = - queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped - fragment = s[5], - original = str, - noscheme = false, - filename = filename, - } - end - return s -end - --- inspect(hashed("template://test")) - --- Here we assume: --- --- files: /// = relative --- files: //// = absolute (!) - ---~ table.print(hashed("file://c:/opt/tex/texmf-local")) -- c:/opt/tex/texmf-local ---~ table.print(hashed("file://opt/tex/texmf-local" )) -- opt/tex/texmf-local ---~ table.print(hashed("file:///opt/tex/texmf-local" )) -- opt/tex/texmf-local ---~ table.print(hashed("file:////opt/tex/texmf-local" )) -- /opt/tex/texmf-local ---~ table.print(hashed("file:///./opt/tex/texmf-local" )) -- ./opt/tex/texmf-local - ---~ table.print(hashed("c:/opt/tex/texmf-local" )) -- c:/opt/tex/texmf-local ---~ table.print(hashed("opt/tex/texmf-local" )) -- opt/tex/texmf-local ---~ table.print(hashed("/opt/tex/texmf-local" )) -- /opt/tex/texmf-local - -url.split = split -url.hasscheme = hasscheme -url.hashed = hashed - -function url.addscheme(str,scheme) -- no authority - if hasscheme(str) then - return str - elseif not scheme then - return "file:///" .. str - else - return scheme .. ":///" .. str - end -end - -function url.construct(hash) -- dodo: we need to escape ! - local fullurl, f = { }, 0 - local scheme, authority, path, query, fragment = hash.scheme, hash.authority, hash.path, hash.query, hash.fragment - if scheme and scheme ~= "" then - f = f + 1 ; fullurl[f] = scheme .. "://" - end - if authority and authority ~= "" then - f = f + 1 ; fullurl[f] = authority - end - if path and path ~= "" then - f = f + 1 ; fullurl[f] = "/" .. path - end - if query and query ~= "" then - f = f + 1 ; fullurl[f] = "?".. query - end - if fragment and fragment ~= "" then - f = f + 1 ; fullurl[f] = "#".. fragment - end - return lpegmatch(escaper,concat(fullurl)) -end - -local pattern = Cs(noslash * R("az","AZ") * (S(":|")/":") * noslash * P(1)^0) - -function url.filename(filename) - local spec = hashed(filename) - local path = spec.path - return (spec.scheme == "file" and path and lpegmatch(pattern,path)) or filename -end - --- print(url.filename("/c|/test")) --- print(url.filename("/c/test")) - -local function escapestring(str) - return lpegmatch(escaper,str) -end - -url.escape = escapestring - -function url.query(str) - if type(str) == "string" then - return lpegmatch(splitquery,str) or "" - else - return str - end -end - -function url.toquery(data) - local td = type(data) - if td == "string" then - return #str and escape(data) or nil -- beware of double escaping - elseif td == "table" then - if next(data) then - local t = { } - for k, v in next, data do - t[#t+1] = format("%s=%s",k,escapestring(v)) - end - return concat(t,"&") - end - else - -- nil is a signal that no query - end -end - --- /test/ | /test | test/ | test => test - -local pattern = Cs(noslash^0 * (1 - noslash * P(-1))^0) - -function url.barepath(path) - if not path or path == "" then - return "" - else - return lpegmatch(pattern,path) - end -end - --- print(url.barepath("/test"),url.barepath("test/"),url.barepath("/test/"),url.barepath("test")) --- print(url.barepath("/x/yz"),url.barepath("x/yz/"),url.barepath("/x/yz/"),url.barepath("x/yz")) - ---~ print(url.filename("file:///c:/oeps.txt")) ---~ print(url.filename("c:/oeps.txt")) ---~ print(url.filename("file:///oeps.txt")) ---~ print(url.filename("file:///etc/test.txt")) ---~ print(url.filename("/oeps.txt")) - ---~ from the spec on the web (sort of): - ---~ local function test(str) ---~ local t = url.hashed(str) ---~ t.constructed = url.construct(t) ---~ print(table.serialize(t)) ---~ end - ---~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) ---~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) - ---~ test("sys:///./colo-rgb") - ---~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg") ---~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") ---~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") ---~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") ---~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") - ---~ test("file:///cow%20with%20spaces") ---~ test("file:///cow%20with%20spaces.pdf") ---~ test("cow%20with%20spaces.pdf") ---~ test("some%20file") ---~ test("/etc/passwords") ---~ test("http://www.myself.com/some%20words.html") ---~ test("file:///c:/oeps.txt") ---~ test("file:///c|/oeps.txt") ---~ test("file:///etc/oeps.txt") ---~ test("file://./etc/oeps.txt") ---~ test("file:////etc/oeps.txt") ---~ test("ftp://ftp.is.co.za/rfc/rfc1808.txt") ---~ test("http://www.ietf.org/rfc/rfc2396.txt") ---~ test("ldap://[2001:db8::7]/c=GB?objectClass?one#what") ---~ test("mailto:John.Doe@example.com") ---~ test("news:comp.infosystems.www.servers.unix") ---~ test("tel:+1-816-555-1212") ---~ test("telnet://192.0.2.16:80/") ---~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") ---~ test("http://www.pragma-ade.com/spaced%20name") - ---~ test("zip:///oeps/oeps.zip#bla/bla.tex") ---~ test("zip:///oeps/oeps.zip?bla/bla.tex") - ---~ table.print(url.hashed("/test?test")) +if not modules then modules = { } end modules ['l-url'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local char, format, byte = string.char, string.format, string.byte
+local concat = table.concat
+local tonumber, type = tonumber, type
+local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V
+local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer
+
+-- from wikipedia:
+--
+-- foo://username:password@example.com:8042/over/there/index.dtb?type=animal;name=narwhal#nose
+-- \_/ \_______________/ \_________/ \__/ \___/ \_/ \______________________/ \__/
+-- | | | | | | | |
+-- | userinfo hostname port | | query fragment
+-- | \________________________________/\_____________|____|/
+-- scheme | | | |
+-- | authority path | |
+-- | | |
+-- | path interpretable as filename
+-- | ___________|____________ |
+-- / \ / \ |
+-- urn:example:animal:ferret:nose interpretable as extension
+
+url = url or { }
+local url = url
+
+local tochar = function(s) return char(tonumber(s,16)) end
+
+local colon = P(":")
+local qmark = P("?")
+local hash = P("#")
+local slash = P("/")
+local percent = P("%")
+local endofstring = P(-1)
+
+local hexdigit = R("09","AF","af")
+local plus = P("+")
+local nothing = Cc("")
+local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar
+local escaped = (plus / " ") + escapedchar
+
+local noslash = P("/") / ""
+
+-- we assume schemes with more than 1 character (in order to avoid problems with windows disks)
+-- we also assume that when we have a scheme, we also have an authority
+--
+-- maybe we should already split the query (better for unescaping as = & can be part of a value
+
+local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2)
+local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0)
+local pathstr = Cs((escaped+(1- qmark-hash))^0)
+----- querystr = Cs((escaped+(1- hash))^0)
+local querystr = Cs(( (1- hash))^0)
+local fragmentstr = Cs((escaped+(1- endofstring))^0)
+
+local scheme = schemestr * colon + nothing
+local authority = slash * slash * authoritystr + nothing
+local path = slash * pathstr + nothing
+local query = qmark * querystr + nothing
+local fragment = hash * fragmentstr + nothing
+
+local validurl = scheme * authority * path * query * fragment
+local parser = Ct(validurl)
+
+lpegpatterns.url = validurl
+lpegpatterns.urlsplitter = parser
+
+local escapes = { }
+
+setmetatable(escapes, { __index = function(t,k)
+ local v = format("%%%02X",byte(k))
+ t[k] = v
+ return v
+end })
+
+local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most
+local unescaper = Cs((escapedchar + 1)^0)
+
+lpegpatterns.urlunescaped = escapedchar
+lpegpatterns.urlescaper = escaper
+lpegpatterns.urlunescaper = unescaper
+
+-- todo: reconsider Ct as we can as well have five return values (saves a table)
+-- so we can have two parsers, one with and one without
+
+local function split(str)
+ return (type(str) == "string" and lpegmatch(parser,str)) or str
+end
+
+local isscheme = schemestr * colon * slash * slash -- this test also assumes authority
+
+local function hasscheme(str)
+ if str then
+ local scheme = lpegmatch(isscheme,str) -- at least one character
+ return scheme ~= "" and scheme or false
+ else
+ return false
+ end
+end
+
+--~ print(hasscheme("home:"))
+--~ print(hasscheme("home://"))
+
+-- todo: cache them
+
+local rootletter = R("az","AZ")
+ + S("_-+")
+local separator = P("://")
+local qualified = P(".")^0 * P("/")
+ + rootletter * P(":")
+ + rootletter^1 * separator
+ + rootletter^1 * P("/")
+local rootbased = P("/")
+ + rootletter * P(":")
+
+local barswapper = replacer("|",":")
+local backslashswapper = replacer("\\","/")
+
+-- queries:
+
+local equal = P("=")
+local amp = P("&")
+local key = Cs(((escapedchar+1)-equal )^0)
+local value = Cs(((escapedchar+1)-amp -endofstring)^0)
+
+local splitquery = Cf ( Ct("") * P { "sequence",
+ sequence = V("pair") * (amp * V("pair"))^0,
+ pair = Cg(key * equal * value),
+}, rawset)
+
+-- hasher
+
+local function hashed(str) -- not yet ok (/test?test)
+ if str == "" then
+ return {
+ scheme = "invalid",
+ original = str,
+ }
+ end
+ local s = split(str)
+ local rawscheme = s[1]
+ local rawquery = s[4]
+ local somescheme = rawscheme ~= ""
+ local somequery = rawquery ~= ""
+ if not somescheme and not somequery then
+ s = {
+ scheme = "file",
+ authority = "",
+ path = str,
+ query = "",
+ fragment = "",
+ original = str,
+ noscheme = true,
+ filename = str,
+ }
+ else -- not always a filename but handy anyway
+ local authority, path, filename = s[2], s[3]
+ if authority == "" then
+ filename = path
+ elseif path == "" then
+ filename = ""
+ else
+ filename = authority .. "/" .. path
+ end
+ s = {
+ scheme = rawscheme,
+ authority = authority,
+ path = path,
+ query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and =
+ queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped
+ fragment = s[5],
+ original = str,
+ noscheme = false,
+ filename = filename,
+ }
+ end
+ return s
+end
+
+-- inspect(hashed("template://test"))
+
+-- Here we assume:
+--
+-- files: /// = relative
+-- files: //// = absolute (!)
+
+--~ table.print(hashed("file://c:/opt/tex/texmf-local")) -- c:/opt/tex/texmf-local
+--~ table.print(hashed("file://opt/tex/texmf-local" )) -- opt/tex/texmf-local
+--~ table.print(hashed("file:///opt/tex/texmf-local" )) -- opt/tex/texmf-local
+--~ table.print(hashed("file:////opt/tex/texmf-local" )) -- /opt/tex/texmf-local
+--~ table.print(hashed("file:///./opt/tex/texmf-local" )) -- ./opt/tex/texmf-local
+
+--~ table.print(hashed("c:/opt/tex/texmf-local" )) -- c:/opt/tex/texmf-local
+--~ table.print(hashed("opt/tex/texmf-local" )) -- opt/tex/texmf-local
+--~ table.print(hashed("/opt/tex/texmf-local" )) -- /opt/tex/texmf-local
+
+url.split = split
+url.hasscheme = hasscheme
+url.hashed = hashed
+
+function url.addscheme(str,scheme) -- no authority
+ if hasscheme(str) then
+ return str
+ elseif not scheme then
+ return "file:///" .. str
+ else
+ return scheme .. ":///" .. str
+ end
+end
+
+function url.construct(hash) -- dodo: we need to escape !
+ local fullurl, f = { }, 0
+ local scheme, authority, path, query, fragment = hash.scheme, hash.authority, hash.path, hash.query, hash.fragment
+ if scheme and scheme ~= "" then
+ f = f + 1 ; fullurl[f] = scheme .. "://"
+ end
+ if authority and authority ~= "" then
+ f = f + 1 ; fullurl[f] = authority
+ end
+ if path and path ~= "" then
+ f = f + 1 ; fullurl[f] = "/" .. path
+ end
+ if query and query ~= "" then
+ f = f + 1 ; fullurl[f] = "?".. query
+ end
+ if fragment and fragment ~= "" then
+ f = f + 1 ; fullurl[f] = "#".. fragment
+ end
+ return lpegmatch(escaper,concat(fullurl))
+end
+
+local pattern = Cs(noslash * R("az","AZ") * (S(":|")/":") * noslash * P(1)^0)
+
+function url.filename(filename)
+ local spec = hashed(filename)
+ local path = spec.path
+ return (spec.scheme == "file" and path and lpegmatch(pattern,path)) or filename
+end
+
+-- print(url.filename("/c|/test"))
+-- print(url.filename("/c/test"))
+
+local function escapestring(str)
+ return lpegmatch(escaper,str)
+end
+
+url.escape = escapestring
+
+function url.query(str)
+ if type(str) == "string" then
+ return lpegmatch(splitquery,str) or ""
+ else
+ return str
+ end
+end
+
+function url.toquery(data)
+ local td = type(data)
+ if td == "string" then
+ return #str and escape(data) or nil -- beware of double escaping
+ elseif td == "table" then
+ if next(data) then
+ local t = { }
+ for k, v in next, data do
+ t[#t+1] = format("%s=%s",k,escapestring(v))
+ end
+ return concat(t,"&")
+ end
+ else
+ -- nil is a signal that no query
+ end
+end
+
+-- /test/ | /test | test/ | test => test
+
+local pattern = Cs(noslash^0 * (1 - noslash * P(-1))^0)
+
+function url.barepath(path)
+ if not path or path == "" then
+ return ""
+ else
+ return lpegmatch(pattern,path)
+ end
+end
+
+-- print(url.barepath("/test"),url.barepath("test/"),url.barepath("/test/"),url.barepath("test"))
+-- print(url.barepath("/x/yz"),url.barepath("x/yz/"),url.barepath("/x/yz/"),url.barepath("x/yz"))
+
+--~ print(url.filename("file:///c:/oeps.txt"))
+--~ print(url.filename("c:/oeps.txt"))
+--~ print(url.filename("file:///oeps.txt"))
+--~ print(url.filename("file:///etc/test.txt"))
+--~ print(url.filename("/oeps.txt"))
+
+--~ from the spec on the web (sort of):
+
+--~ local function test(str)
+--~ local t = url.hashed(str)
+--~ t.constructed = url.construct(t)
+--~ print(table.serialize(t))
+--~ end
+
+--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45"))
+--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45"))
+
+--~ test("sys:///./colo-rgb")
+
+--~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg")
+--~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+
+--~ test("file:///cow%20with%20spaces")
+--~ test("file:///cow%20with%20spaces.pdf")
+--~ test("cow%20with%20spaces.pdf")
+--~ test("some%20file")
+--~ test("/etc/passwords")
+--~ test("http://www.myself.com/some%20words.html")
+--~ test("file:///c:/oeps.txt")
+--~ test("file:///c|/oeps.txt")
+--~ test("file:///etc/oeps.txt")
+--~ test("file://./etc/oeps.txt")
+--~ test("file:////etc/oeps.txt")
+--~ test("ftp://ftp.is.co.za/rfc/rfc1808.txt")
+--~ test("http://www.ietf.org/rfc/rfc2396.txt")
+--~ test("ldap://[2001:db8::7]/c=GB?objectClass?one#what")
+--~ test("mailto:John.Doe@example.com")
+--~ test("news:comp.infosystems.www.servers.unix")
+--~ test("tel:+1-816-555-1212")
+--~ test("telnet://192.0.2.16:80/")
+--~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")
+--~ test("http://www.pragma-ade.com/spaced%20name")
+
+--~ test("zip:///oeps/oeps.zip#bla/bla.tex")
+--~ test("zip:///oeps/oeps.zip?bla/bla.tex")
+
+--~ table.print(url.hashed("/test?test"))
|