summaryrefslogtreecommitdiff
path: root/tex/context/base/l-url.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/l-url.lua')
-rw-r--r--tex/context/base/l-url.lua116
1 files changed, 83 insertions, 33 deletions
diff --git a/tex/context/base/l-url.lua b/tex/context/base/l-url.lua
index e3e6f8130..43fe73d2b 100644
--- a/tex/context/base/l-url.lua
+++ b/tex/context/base/l-url.lua
@@ -6,9 +6,10 @@ if not modules then modules = { } end modules ['l-url'] = {
license = "see context related readme files"
}
-local char, gmatch, gsub = string.char, string.gmatch, string.gsub
+local char, gmatch, gsub, format, byte = string.char, string.gmatch, string.gsub, string.format, string.byte
+local concat = table.concat
local tonumber, type = tonumber, type
-local lpegmatch = lpeg.match
+local lpegmatch, lpegP, lpegC, lpegR, lpegS, lpegCs, lpegCc = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc
-- from the spec (on the web):
--
@@ -26,22 +27,35 @@ local function tochar(s)
return char(tonumber(s,16))
end
-local colon, qmark, hash, slash, percent, endofstring = lpeg.P(":"), lpeg.P("?"), lpeg.P("#"), lpeg.P("/"), lpeg.P("%"), lpeg.P(-1)
+local colon, qmark, hash, slash, percent, endofstring = lpegP(":"), lpegP("?"), lpegP("#"), lpegP("/"), lpegP("%"), lpegP(-1)
-local hexdigit = lpeg.R("09","AF","af")
-local plus = lpeg.P("+")
-local escaped = (plus / " ") + (percent * lpeg.C(hexdigit * hexdigit) / tochar)
+local hexdigit = lpegR("09","AF","af")
+local plus = lpegP("+")
+local nothing = lpegCc("")
+local escaped = (plus / " ") + (percent * lpegC(hexdigit * hexdigit) / tochar)
-- we assume schemes with more than 1 character (in order to avoid problems with windows disks)
-local scheme = lpeg.Cs((escaped+(1-colon-slash-qmark-hash))^2) * colon + lpeg.Cc("")
-local authority = slash * slash * lpeg.Cs((escaped+(1- slash-qmark-hash))^0) + lpeg.Cc("")
-local path = slash * lpeg.Cs((escaped+(1- qmark-hash))^0) + lpeg.Cc("")
-local query = qmark * lpeg.Cs((escaped+(1- hash))^0) + lpeg.Cc("")
-local fragment = hash * lpeg.Cs((escaped+(1- endofstring))^0) + lpeg.Cc("")
+local scheme = lpegCs((escaped+(1-colon-slash-qmark-hash))^2) * colon + nothing
+local authority = slash * slash * lpegCs((escaped+(1- slash-qmark-hash))^0) + nothing
+local path = slash * lpegCs((escaped+(1- qmark-hash))^0) + nothing
+local query = qmark * lpegCs((escaped+(1- hash))^0) + nothing
+local fragment = hash * lpegCs((escaped+(1- endofstring))^0) + nothing
local parser = lpeg.Ct(scheme * authority * path * query * fragment)
+lpeg.patterns.urlsplitter = parser
+
+local escapes = { }
+
+for i=0,255 do
+ escapes[i] = format("%%%02X",i)
+end
+
+local escaper = lpeg.Cs((lpegR("09","AZ","az") + lpegS("-./_") + lpegP(1) / escapes)^0)
+
+lpeg.patterns.urlescaper = escaper
+
-- todo: reconsider Ct as we can as well have five return values (saves a table)
-- so we can have two parsers, one with and one without
@@ -54,15 +68,27 @@ end
function url.hashed(str)
local s = url.split(str)
local somescheme = s[1] ~= ""
- return {
- scheme = (somescheme and s[1]) or "file",
- authority = s[2],
- path = s[3],
- query = s[4],
- fragment = s[5],
- original = str,
- noscheme = not somescheme,
- }
+ if not somescheme then
+ return {
+ scheme = "file",
+ authority = "",
+ path = str,
+ query = "",
+ fragment = "",
+ original = str,
+ noscheme = true,
+ }
+ else
+ return {
+ scheme = s[1],
+ authority = s[2],
+ path = s[3],
+ query = s[4],
+ fragment = s[5],
+ original = str,
+ noscheme = false,
+ }
+ end
end
function url.hasscheme(str)
@@ -73,15 +99,25 @@ function url.addscheme(str,scheme)
return (url.hasscheme(str) and str) or ((scheme or "file:///") .. str)
end
-function url.construct(hash)
- local fullurl = hash.sheme .. "://".. hash.authority .. hash.path
- if hash.query then
- fullurl = fullurl .. "?".. hash.query
+function url.construct(hash) -- dodo: we need to escape !
+ local fullurl = { }
+ local scheme, authority, path, query, fragment = hash.scheme, hash.authority, hash.path, hash.query, hash.fragment
+ if scheme and scheme ~= "" then
+ fullurl[#fullurl+1] = scheme .. "://"
+ end
+ if authority and authority ~= "" then
+ fullurl[#fullurl+1] = authority
end
- if hash.fragment then
- fullurl = fullurl .. "?".. hash.fragment
+ if path and path ~= "" then
+ fullurl[#fullurl+1] = "/" .. path
end
- return fullurl
+ if query and query ~= "" then
+ fullurl[#fullurl+1] = "?".. query
+ end
+ if fragment and fragment ~= "" then
+ fullurl[#fullurl+1] = "#".. fragment
+ end
+ return lpegmatch(escaper,concat(fullurl))
end
function url.filename(filename)
@@ -108,12 +144,27 @@ end
--~ print(url.filename("/oeps.txt"))
--~ from the spec on the web (sort of):
---~
---~ function test(str)
---~ print(table.serialize(url.hashed(str)))
+
+--~ local function test(str)
+--~ local t = url.hashed(str)
+--~ t.constructed = url.construct(t)
+--~ print(table.serialize(t))
--~ end
---~
---~ test("%56pass%20words")
+
+--~ test("sys:///./colo-rgb")
+
+--~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg")
+--~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+
+--~ test("file:///cow%20with%20spaces")
+--~ test("file:///cow%20with%20spaces.pdf")
+--~ test("cow%20with%20spaces.pdf")
+--~ test("some%20file")
+--~ test("/etc/passwords")
+--~ test("http://www.myself.com/some%20words.html")
--~ test("file:///c:/oeps.txt")
--~ test("file:///c|/oeps.txt")
--~ test("file:///etc/oeps.txt")
@@ -127,7 +178,6 @@ end
--~ test("tel:+1-816-555-1212")
--~ test("telnet://192.0.2.16:80/")
--~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")
---~ test("/etc/passwords")
--~ test("http://www.pragma-ade.com/spaced%20name")
--~ test("zip:///oeps/oeps.zip#bla/bla.tex")