diff options
Diffstat (limited to 'tex/context/base/l-url.lua')
-rw-r--r-- | tex/context/base/l-url.lua | 116 |
1 files changed, 83 insertions, 33 deletions
diff --git a/tex/context/base/l-url.lua b/tex/context/base/l-url.lua index e3e6f8130..43fe73d2b 100644 --- a/tex/context/base/l-url.lua +++ b/tex/context/base/l-url.lua @@ -6,9 +6,10 @@ if not modules then modules = { } end modules ['l-url'] = { license = "see context related readme files" } -local char, gmatch, gsub = string.char, string.gmatch, string.gsub +local char, gmatch, gsub, format, byte = string.char, string.gmatch, string.gsub, string.format, string.byte +local concat = table.concat local tonumber, type = tonumber, type -local lpegmatch = lpeg.match +local lpegmatch, lpegP, lpegC, lpegR, lpegS, lpegCs, lpegCc = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc -- from the spec (on the web): -- @@ -26,22 +27,35 @@ local function tochar(s) return char(tonumber(s,16)) end -local colon, qmark, hash, slash, percent, endofstring = lpeg.P(":"), lpeg.P("?"), lpeg.P("#"), lpeg.P("/"), lpeg.P("%"), lpeg.P(-1) +local colon, qmark, hash, slash, percent, endofstring = lpegP(":"), lpegP("?"), lpegP("#"), lpegP("/"), lpegP("%"), lpegP(-1) -local hexdigit = lpeg.R("09","AF","af") -local plus = lpeg.P("+") -local escaped = (plus / " ") + (percent * lpeg.C(hexdigit * hexdigit) / tochar) +local hexdigit = lpegR("09","AF","af") +local plus = lpegP("+") +local nothing = lpegCc("") +local escaped = (plus / " ") + (percent * lpegC(hexdigit * hexdigit) / tochar) -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) -local scheme = lpeg.Cs((escaped+(1-colon-slash-qmark-hash))^2) * colon + lpeg.Cc("") -local authority = slash * slash * lpeg.Cs((escaped+(1- slash-qmark-hash))^0) + lpeg.Cc("") -local path = slash * lpeg.Cs((escaped+(1- qmark-hash))^0) + lpeg.Cc("") -local query = qmark * lpeg.Cs((escaped+(1- hash))^0) + lpeg.Cc("") -local fragment = hash * lpeg.Cs((escaped+(1- endofstring))^0) + lpeg.Cc("") +local scheme = lpegCs((escaped+(1-colon-slash-qmark-hash))^2) * colon + nothing +local authority = slash * slash * lpegCs((escaped+(1- slash-qmark-hash))^0) + nothing +local path = slash * lpegCs((escaped+(1- qmark-hash))^0) + nothing +local query = qmark * lpegCs((escaped+(1- hash))^0) + nothing +local fragment = hash * lpegCs((escaped+(1- endofstring))^0) + nothing local parser = lpeg.Ct(scheme * authority * path * query * fragment) +lpeg.patterns.urlsplitter = parser + +local escapes = { } + +for i=0,255 do + escapes[i] = format("%%%02X",i) +end + +local escaper = lpeg.Cs((lpegR("09","AZ","az") + lpegS("-./_") + lpegP(1) / escapes)^0) + +lpeg.patterns.urlescaper = escaper + -- todo: reconsider Ct as we can as well have five return values (saves a table) -- so we can have two parsers, one with and one without @@ -54,15 +68,27 @@ end function url.hashed(str) local s = url.split(str) local somescheme = s[1] ~= "" - return { - scheme = (somescheme and s[1]) or "file", - authority = s[2], - path = s[3], - query = s[4], - fragment = s[5], - original = str, - noscheme = not somescheme, - } + if not somescheme then + return { + scheme = "file", + authority = "", + path = str, + query = "", + fragment = "", + original = str, + noscheme = true, + } + else + return { + scheme = s[1], + authority = s[2], + path = s[3], + query = s[4], + fragment = s[5], + original = str, + noscheme = false, + } + end end function url.hasscheme(str) @@ -73,15 +99,25 @@ function url.addscheme(str,scheme) return (url.hasscheme(str) and str) or ((scheme or "file:///") .. str) end -function url.construct(hash) - local fullurl = hash.sheme .. "://".. hash.authority .. hash.path - if hash.query then - fullurl = fullurl .. "?".. hash.query +function url.construct(hash) -- dodo: we need to escape ! + local fullurl = { } + local scheme, authority, path, query, fragment = hash.scheme, hash.authority, hash.path, hash.query, hash.fragment + if scheme and scheme ~= "" then + fullurl[#fullurl+1] = scheme .. "://" + end + if authority and authority ~= "" then + fullurl[#fullurl+1] = authority end - if hash.fragment then - fullurl = fullurl .. "?".. hash.fragment + if path and path ~= "" then + fullurl[#fullurl+1] = "/" .. path end - return fullurl + if query and query ~= "" then + fullurl[#fullurl+1] = "?".. query + end + if fragment and fragment ~= "" then + fullurl[#fullurl+1] = "#".. fragment + end + return lpegmatch(escaper,concat(fullurl)) end function url.filename(filename) @@ -108,12 +144,27 @@ end --~ print(url.filename("/oeps.txt")) --~ from the spec on the web (sort of): ---~ ---~ function test(str) ---~ print(table.serialize(url.hashed(str))) + +--~ local function test(str) +--~ local t = url.hashed(str) +--~ t.constructed = url.construct(t) +--~ print(table.serialize(t)) --~ end ---~ ---~ test("%56pass%20words") + +--~ test("sys:///./colo-rgb") + +--~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg") +--~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") + +--~ test("file:///cow%20with%20spaces") +--~ test("file:///cow%20with%20spaces.pdf") +--~ test("cow%20with%20spaces.pdf") +--~ test("some%20file") +--~ test("/etc/passwords") +--~ test("http://www.myself.com/some%20words.html") --~ test("file:///c:/oeps.txt") --~ test("file:///c|/oeps.txt") --~ test("file:///etc/oeps.txt") @@ -127,7 +178,6 @@ end --~ test("tel:+1-816-555-1212") --~ test("telnet://192.0.2.16:80/") --~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") ---~ test("/etc/passwords") --~ test("http://www.pragma-ade.com/spaced%20name") --~ test("zip:///oeps/oeps.zip#bla/bla.tex") |