diff options
Diffstat (limited to 'tex/context/base/l-url.lua')
-rw-r--r-- | tex/context/base/l-url.lua | 124 |
1 files changed, 18 insertions, 106 deletions
diff --git a/tex/context/base/l-url.lua b/tex/context/base/l-url.lua index ab5002881..69f32cc39 100644 --- a/tex/context/base/l-url.lua +++ b/tex/context/base/l-url.lua @@ -9,7 +9,7 @@ if not modules then modules = { } end modules ['l-url'] = { local char, gmatch, gsub, format, byte, find = string.char, string.gmatch, string.gsub, string.format, string.byte, string.find local concat = table.concat local tonumber, type = tonumber, type -local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V +local P, C, R, S, Cs, Cc, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer -- from wikipedia: @@ -42,19 +42,15 @@ local endofstring = P(-1) local hexdigit = R("09","AF","af") local plus = P("+") local nothing = Cc("") -local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar -local escaped = (plus / " ") + escapedchar +local escaped = (plus / " ") + (percent * C(hexdigit * hexdigit) / tochar) -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) -- we also assume that when we have a scheme, we also have an authority --- --- maybe we should already split the query (better for unescaping as = & can be part of a value local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2) local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0) local pathstr = Cs((escaped+(1- qmark-hash))^0) ------ querystr = Cs((escaped+(1- hash))^0) -local querystr = Cs(( (1- hash))^0) +local querystr = Cs((escaped+(1- hash))^0) local fragmentstr = Cs((escaped+(1- endofstring))^0) local scheme = schemestr * colon + nothing @@ -69,20 +65,11 @@ local parser = Ct(validurl) lpegpatterns.url = validurl lpegpatterns.urlsplitter = parser -local escapes = { } - -setmetatable(escapes, { __index = function(t,k) - local v = format("%%%02X",byte(k)) - t[k] = v - return v -end }) +local escapes = { } ; for i=0,255 do escapes[i] = format("%%%02X",i) end -local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most -local unescaper = Cs((escapedchar + 1)^0) +local escaper = Cs((R("09","AZ","az") + S("-./_") + P(1) / escapes)^0) -lpegpatterns.urlunescaped = escapedchar -lpegpatterns.urlescaper = escaper -lpegpatterns.urlunescaper = unescaper +lpegpatterns.urlescaper = escaper -- todo: reconsider Ct as we can as well have five return values (saves a table) -- so we can have two parsers, one with and one without @@ -94,12 +81,8 @@ end local isscheme = schemestr * colon * slash * slash -- this test also assumes authority local function hasscheme(str) - if str then - local scheme = lpegmatch(isscheme,str) -- at least one character - return scheme ~= "" and scheme or false - else - return false - end + local scheme = lpegmatch(isscheme,str) -- at least one character + return scheme ~= "" and scheme or false end --~ print(hasscheme("home:")) @@ -120,32 +103,10 @@ local rootbased = P("/") local barswapper = replacer("|",":") local backslashswapper = replacer("\\","/") --- queries: - -local equal = P("=") -local amp = P("&") -local key = Cs(((escapedchar+1)-equal )^0) -local value = Cs(((escapedchar+1)-amp -endofstring)^0) - -local splitquery = Cf ( Ct("") * P { "sequence", - sequence = V("pair") * (amp * V("pair"))^0, - pair = Cg(key * equal * value), -}, rawset) - --- hasher - local function hashed(str) -- not yet ok (/test?test) - if str == "" then - return { - scheme = "invalid", - original = str, - } - end local s = split(str) - local rawscheme = s[1] - local rawquery = s[4] - local somescheme = rawscheme ~= "" - local somequery = rawquery ~= "" + local somescheme = s[1] ~= "" + local somequery = s[4] ~= "" if not somescheme and not somequery then s = { scheme = "file", @@ -161,17 +122,14 @@ local function hashed(str) -- not yet ok (/test?test) local authority, path, filename = s[2], s[3] if authority == "" then filename = path - elseif path == "" then - filename = "" else filename = authority .. "/" .. path end s = { - scheme = rawscheme, + scheme = s[1], authority = authority, path = path, - query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = - queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped + query = s[4], fragment = s[5], original = str, noscheme = false, @@ -181,8 +139,6 @@ local function hashed(str) -- not yet ok (/test?test) return s end --- inspect(hashed("template://test")) - -- Here we assume: -- -- files: /// = relative @@ -233,61 +189,20 @@ function url.construct(hash) -- dodo: we need to escape ! return lpegmatch(escaper,concat(fullurl)) end -function url.filename(filename) -- why no lpeg here ? +function url.filename(filename) local t = hashed(filename) return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename end -local function escapestring(str) - return lpegmatch(escaper,str) -end - -url.escape = escapestring - --- function url.query(str) -- separator could be an option --- if type(str) == "string" then --- local t = { } --- for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do --- t[k] = v --- end --- return t --- else --- return str --- end --- end - function url.query(str) if type(str) == "string" then - return lpegmatch(splitquery,str) or "" - else - return str - end -end - -function url.toquery(data) - local td = type(data) - if td == "string" then - return #str and escape(data) or nil -- beware of double escaping - elseif td == "table" then - if next(data) then - local t = { } - for k, v in next, data do - t[#t+1] = format("%s=%s",k,escapestring(v)) - end - return concat(t,"&") + local t = { } + for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do + t[k] = v end + return t else - -- nil is a signal that no query - end -end - --- /test/ | /test | test/ | test => test - -function url.barepath(path) - if not path or path == "" then - return "" - else - return (gsub(path,"^/?(.-)/?$","%1")) + return str end end @@ -305,9 +220,6 @@ end --~ print(table.serialize(t)) --~ end ---~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) ---~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) - --~ test("sys:///./colo-rgb") --~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg") |