From 281539cf53b8ec43d72e06cbdba874b2de6e758d Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 11 Dec 2014 21:03:53 +0100 Subject: sync with Context as of 2014-12-11 --- lualibs-boolean.lua | 6 +- lualibs-dir.lua | 4 +- lualibs-file.lua | 75 +++++- lualibs-lpeg.lua | 91 +++++++ lualibs-md5.lua | 64 +++-- lualibs-string.lua | 2 +- lualibs-table.lua | 2 +- lualibs-unicode.lua | 663 +++++++++++++++++++++++++++++---------------------- lualibs-url.lua | 68 +++--- lualibs-util-prs.lua | 24 +- lualibs-util-sta.lua | 22 +- lualibs-util-str.lua | 9 +- lualibs-util-tab.lua | 11 + lualibs-util-tpl.lua | 20 +- 14 files changed, 679 insertions(+), 382 deletions(-) diff --git a/lualibs-boolean.lua b/lualibs-boolean.lua index 8d11080..8f18d4c 100644 --- a/lualibs-boolean.lua +++ b/lualibs-boolean.lua @@ -57,11 +57,11 @@ function string.booleanstring(str) end end -function string.is_boolean(str,default) +function string.is_boolean(str,default,strict) if type(str) == "string" then - if str == "true" or str == "yes" or str == "on" or str == "t" or str == "1" then + if str == "true" or str == "yes" or str == "on" or str == "t" or (not strict and str == "1") then return true - elseif str == "false" or str == "no" or str == "off" or str == "f" or str == "0" then + elseif str == "false" or str == "no" or str == "off" or str == "f" or (not strict and str == "0") then return false end end diff --git a/lualibs-dir.lua b/lualibs-dir.lua index bcf28d0..c56af1b 100644 --- a/lualibs-dir.lua +++ b/lualibs-dir.lua @@ -156,7 +156,7 @@ end local function globpattern(path,patt,recurse,method) local kind = type(method) - if pattern and sub(patt,1,-3) == path then + if patt and sub(patt,1,-3) == path then patt = false end if kind == "function" then @@ -209,7 +209,7 @@ end dir.collectpattern = collectpattern -local separator +local separator, pattern if onwindows then -- we could sanitize here diff --git a/lualibs-file.lua b/lualibs-file.lua index c05372a..2742e99 100644 --- a/lualibs-file.lua +++ b/lualibs-file.lua @@ -385,31 +385,90 @@ local deslasher = lpeg.replacer(S("\\/")^1,"/") -- then we still have to deal with urls ... anyhow, multiple // are never a real -- problem but just ugly. -function file.join(...) - local lst = { ... } - local one = lst[1] +-- function file.join(...) +-- local lst = { ... } +-- local one = lst[1] +-- if lpegmatch(isnetwork,one) then +-- local one = lpegmatch(reslasher,one) +-- local two = lpegmatch(deslasher,concat(lst,"/",2)) +-- if lpegmatch(hasroot,two) then +-- return one .. two +-- else +-- return one .. "/" .. two +-- end +-- elseif lpegmatch(isroot,one) then +-- local two = lpegmatch(deslasher,concat(lst,"/",2)) +-- if lpegmatch(hasroot,two) then +-- return two +-- else +-- return "/" .. two +-- end +-- elseif one == "" then +-- return lpegmatch(stripper,concat(lst,"/",2)) +-- else +-- return lpegmatch(deslasher,concat(lst,"/")) +-- end +-- end + +function file.join(one, two, three, ...) + if not two then + return one == "" and one or lpegmatch(stripper,one) + end + if one == "" then + return lpegmatch(stripper,three and concat({ two, three, ... },"/") or two) + end if lpegmatch(isnetwork,one) then local one = lpegmatch(reslasher,one) - local two = lpegmatch(deslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,three and concat({ two, three, ... },"/") or two) if lpegmatch(hasroot,two) then return one .. two else return one .. "/" .. two end elseif lpegmatch(isroot,one) then - local two = lpegmatch(deslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,three and concat({ two, three, ... },"/") or two) if lpegmatch(hasroot,two) then return two else return "/" .. two end - elseif one == "" then - return lpegmatch(stripper,concat(lst,"/",2)) else - return lpegmatch(deslasher,concat(lst,"/")) + return lpegmatch(deslasher,concat({ one, two, three, ... },"/")) end end +-- or we can use this: +-- +-- function file.join(...) +-- local n = select("#",...) +-- local one = select(1,...) +-- if n == 1 then +-- return one == "" and one or lpegmatch(stripper,one) +-- end +-- if one == "" then +-- return lpegmatch(stripper,n > 2 and concat({ ... },"/",2) or select(2,...)) +-- end +-- if lpegmatch(isnetwork,one) then +-- local one = lpegmatch(reslasher,one) +-- local two = lpegmatch(deslasher,n > 2 and concat({ ... },"/",2) or select(2,...)) +-- if lpegmatch(hasroot,two) then +-- return one .. two +-- else +-- return one .. "/" .. two +-- end +-- elseif lpegmatch(isroot,one) then +-- local two = lpegmatch(deslasher,n > 2 and concat({ ... },"/",2) or select(2,...)) +-- if lpegmatch(hasroot,two) then +-- return two +-- else +-- return "/" .. two +-- end +-- else +-- return lpegmatch(deslasher,concat({ ... },"/")) +-- end +-- end + +-- print(file.join("c:/whatever")) -- print(file.join("c:/whatever","name")) -- print(file.join("//","/y")) -- print(file.join("/","/y")) diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index f3fd28b..192e32f 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -145,6 +145,9 @@ patterns.utfbom_8 = utfbom_8 patterns.utf_16_be_nl = P("\000\r\000\n") + P("\000\r") + P("\000\n") -- P("\000\r") * (P("\000\n") + P(true)) + P("\000\n") patterns.utf_16_le_nl = P("\r\000\n\000") + P("\r\000") + P("\n\000") -- P("\r\000") * (P("\n\000") + P(true)) + P("\n\000") +patterns.utf_32_be_nl = P("\000\000\000\r\000\000\000\n") + P("\000\000\000\r") + P("\000\000\000\n") +patterns.utf_32_le_nl = P("\r\000\000\000\n\000\000\000") + P("\r\000\000\000") + P("\n\000\000\000") + patterns.utf8one = R("\000\127") patterns.utf8two = R("\194\223") * utf8next patterns.utf8three = R("\224\239") * utf8next * utf8next @@ -183,10 +186,26 @@ local fullstripper = whitespace^0 * C((whitespace^0 * nonwhitespace^1)^0) ----- collapser = Cs(spacer^0/"" * ((spacer^1 * endofstring / "") + (spacer^1/" ") + P(1))^0) local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) +local b_collapser = Cs( whitespace^0 /"" * (nonwhitespace^1 + whitespace^1/" ")^0) +local e_collapser = Cs((whitespace^1 * P(-1)/"" + nonwhitespace^1 + whitespace^1/" ")^0) +local m_collapser = Cs( (nonwhitespace^1 + whitespace^1/" ")^0) + +local b_stripper = Cs( spacer^0 /"" * (nonspacer^1 + spacer^1/" ")^0) +local e_stripper = Cs((spacer^1 * P(-1)/"" + nonspacer^1 + spacer^1/" ")^0) +local m_stripper = Cs( (nonspacer^1 + spacer^1/" ")^0) + patterns.stripper = stripper patterns.fullstripper = fullstripper patterns.collapser = collapser +patterns.b_collapser = b_collapser +patterns.m_collapser = m_collapser +patterns.e_collapser = e_collapser + +patterns.b_stripper = b_stripper +patterns.m_stripper = m_stripper +patterns.e_stripper = e_stripper + patterns.lowercase = lowercase patterns.uppercase = uppercase patterns.letter = patterns.lowercase + patterns.uppercase @@ -1014,3 +1033,75 @@ lpeg.patterns.stripzeros = stripper -- lpegmatch(stripper,str) -- print(#str, os.clock()-ts, lpegmatch(stripper,sample)) +-- for practical reasone we keep this here: + +local byte_to_HEX = { } +local byte_to_hex = { } +local byte_to_dec = { } -- for md5 +local hex_to_byte = { } + +for i=0,255 do + local H = format("%02X",i) + local h = format("%02x",i) + local d = format("%03i",i) + local c = char(i) + byte_to_HEX[c] = H + byte_to_hex[c] = h + byte_to_dec[c] = d + hex_to_byte[h] = c + hex_to_byte[H] = c +end + +local hextobyte = P(2)/hex_to_byte +local bytetoHEX = P(1)/byte_to_HEX +local bytetohex = P(1)/byte_to_hex +local bytetodec = P(1)/byte_to_dec +local hextobytes = Cs(hextobyte^0) +local bytestoHEX = Cs(bytetoHEX^0) +local bytestohex = Cs(bytetohex^0) +local bytestodec = Cs(bytetodec^0) + +patterns.hextobyte = hextobyte +patterns.bytetoHEX = bytetoHEX +patterns.bytetohex = bytetohex +patterns.bytetodec = bytetodec +patterns.hextobytes = hextobytes +patterns.bytestoHEX = bytestoHEX +patterns.bytestohex = bytestohex +patterns.bytestodec = bytestodec + +function string.toHEX(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end + +function string.tohex(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestohex,s) + end +end + +function string.todec(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestodec,s) + end +end + +function string.tobytes(s) + if not s or s == "" then + return s + else + return lpegmatch(hextobytes,s) + end +end + +-- local h = "ADFE0345" +-- local b = lpegmatch(patterns.hextobytes,h) +-- print(h,b,string.tohex(b),string.toHEX(b)) diff --git a/lualibs-md5.lua b/lualibs-md5.lua index 8ac20a5..00272c8 100644 --- a/lualibs-md5.lua +++ b/lualibs-md5.lua @@ -19,48 +19,38 @@ if not md5 then end local md5, file = md5, file -local gsub, format, byte = string.gsub, string.format, string.byte -local md5sum = md5.sum +local gsub = string.gsub -local function convert(str,fmt) - return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end)) -end - -if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end -if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end -if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end - --- local P, Cs, lpegmatch = lpeg.P, lpeg.Cs,lpeg.match --- --- if not md5.HEX then --- local function remap(chr) return format("%02X",byte(chr)) end --- function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end --- end +-- local gsub, format, byte = string.gsub, string.format, string.byte -- --- if not md5.hex then --- local function remap(chr) return format("%02x",byte(chr)) end --- function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end +-- local function convert(str,fmt) +-- return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end)) -- end -- --- if not md5.dec then --- local function remap(chr) return format("%03i",byte(chr)) end --- function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end --- end +-- if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end +-- if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end +-- if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end --- if not md5.HEX then --- local pattern_HEX = Cs( ( P(1) / function(chr) return format("%02X",byte(chr)) end)^0 ) --- function md5.HEX(str) return lpegmatch(pattern_HEX,md5.sum(str)) end --- end --- --- if not md5.hex then --- local pattern_hex = Cs( ( P(1) / function(chr) return format("%02x",byte(chr)) end)^0 ) --- function md5.hex(str) return lpegmatch(pattern_hex,md5.sum(str)) end --- end --- --- if not md5.dec then --- local pattern_dec = Cs( ( P(1) / function(chr) return format("%02i",byte(chr)) end)^0 ) --- function md5.dec(str) return lpegmatch(pattern_dec,md5.sum(str)) end --- end +do + + local patterns = lpeg and lpeg.patterns + + if patterns then + + local bytestoHEX = patterns.bytestoHEX + local bytestohex = patterns.bytestohex + local bytestodec = patterns.bytestodec + + local lpegmatch = lpeg.match + local md5sum = md5.sum + + if not md5.HEX then function md5.HEX(str) if str then return lpegmatch(bytestoHEX,md5sum(str)) end end end + if not md5.hex then function md5.hex(str) if str then return lpegmatch(bytestohex,md5sum(str)) end end end + if not md5.dec then function md5.dec(str) if str then return lpegmatch(bytestodec,md5sum(str)) end end end + + end + +end function file.needsupdating(oldname,newname,threshold) -- size modification access change local oldtime = lfs.attributes(oldname,"modification") diff --git a/lualibs-string.lua b/lualibs-string.lua index 3b1a000..70c66f6 100644 --- a/lualibs-string.lua +++ b/lualibs-string.lua @@ -94,7 +94,7 @@ end -- return not find(str,"%S") -- end -local pattern = P(" ")^0 * P(-1) +local pattern = P(" ")^0 * P(-1) -- maybe also newlines -- patterns.onlyspaces = pattern diff --git a/lualibs-table.lua b/lualibs-table.lua index e642106..3eb8b85 100644 --- a/lualibs-table.lua +++ b/lualibs-table.lua @@ -54,7 +54,7 @@ local function compare(a,b) if ta == tb then return a < b else - return tostring(a) < tostring(b) + return tostring(a) < tostring(b) -- not that efficient end end diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua index fb4ea37..02dd1a0 100644 --- a/lualibs-unicode.lua +++ b/lualibs-unicode.lua @@ -56,7 +56,6 @@ local p_utfbom = patterns.utfbom local p_newline = patterns.newline local p_whitespace = patterns.whitespace - if not unicode then unicode = { utf = utf } -- for a while @@ -525,23 +524,59 @@ end -- end, pattern -- end -function utf.remapper(mapping) - local pattern = type(mapping) == "table" and tabletopattern(mapping) or p_utf8char - local pattern = Cs((pattern/mapping + p_utf8char)^0) - return function(str) - if not str or str == "" then - return "" +function utf.remapper(mapping,option) -- static also returns a pattern + local variant = type(mapping) + if variant == "table" then + if option == "dynamic" then + local pattern = false + table.setmetatablenewindex(mapping,function(t,k,v) rawset(t,k,v) pattern = false end) + return function(str) + if not str or str == "" then + return "" + else + if not pattern then + pattern = Cs((tabletopattern(mapping)/mapping + p_utf8char)^0) + end + return lpegmatch(pattern,str) + end + end + elseif option == "pattern" then + return Cs((tabletopattern(mapping)/mapping + p_utf8char)^0) + -- elseif option == "static" then + else + local pattern = Cs((tabletopattern(mapping)/mapping + p_utf8char)^0) + return function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end, pattern + end + elseif variant == "function" then + if option == "pattern" then + return Cs((p_utf8char/mapping + p_utf8char)^0) else - return lpegmatch(pattern,str) + local pattern = Cs((p_utf8char/mapping + p_utf8char)^0) + return function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end, pattern end - end, pattern + else + -- is actually an error + return function(str) + return str or "" + end + end end -- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } -- print(remap("abcd 1234 abcd")) --- - function utf.replacer(t) -- no precheck, always string builder local r = replacer(t,false,false,true) return function(str) @@ -647,285 +682,359 @@ end local utf16_to_utf8_be, utf16_to_utf8_le local utf32_to_utf8_be, utf32_to_utf8_le -local utf_16_be_linesplitter = patterns.utfbom_16_be^-1 * lpeg.tsplitat(patterns.utf_16_be_nl) -local utf_16_le_linesplitter = patterns.utfbom_16_le^-1 * lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_16_be_getbom = patterns.utfbom_16_be^-1 +local utf_16_le_getbom = patterns.utfbom_16_le^-1 +local utf_32_be_getbom = patterns.utfbom_32_be^-1 +local utf_32_le_getbom = patterns.utfbom_32_le^-1 + +local utf_16_be_linesplitter = utf_16_be_getbom * lpeg.tsplitat(patterns.utf_16_be_nl) +local utf_16_le_linesplitter = utf_16_le_getbom * lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_32_be_linesplitter = utf_32_be_getbom * lpeg.tsplitat(patterns.utf_32_be_nl) +local utf_32_le_linesplitter = utf_32_le_getbom * lpeg.tsplitat(patterns.utf_32_le_nl) + +-- we have three possibilities: bytepairs (using tables), gmatch (using tables), gsub and +-- lpeg. Bytepairs are the fastert but as soon as we need to remove bombs and so the gain +-- is less due to more testing. Also, we seldom have to convert utf16 so we don't care to +-- much about a few milliseconds more runtime. The lpeg variant is upto 20% slower but +-- still pretty fast. +-- +-- for historic resone we keep the bytepairs variants around .. beware they don't grab the +-- bom like the lpegs do so they're not dropins in the functions that follow +-- +-- utf16_to_utf8_be = function(s) +-- if not s then +-- return nil +-- elseif s == "" then +-- return "" +-- end +-- local result, r, more = { }, 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*left + right +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- return concat(result) +-- end +-- +-- local utf16_to_utf8_be_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utf_16_be_linesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local s = t[i] +-- if s ~= "" then +-- local r, more = 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*left + right +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- t[i] = concat(result,"",1,r) -- we reused tmp, hence t +-- end +-- end +-- return t +-- end +-- +-- utf16_to_utf8_le = function(s) +-- if not s then +-- return nil +-- elseif s == "" then +-- return "" +-- end +-- local result, r, more = { }, 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*right + left +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- return concat(result) +-- end +-- +-- local utf16_to_utf8_le_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utf_16_le_linesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local s = t[i] +-- if s ~= "" then +-- local r, more = 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*right + left +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- t[i] = concat(result,"",1,r) -- we reused tmp, hence t +-- end +-- end +-- return t +-- end +-- +-- local utf32_to_utf8_be_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utflinesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local r, more = 0, -1 +-- for a,b in bytepairs(t[i]) do +-- if a and b then +-- if more < 0 then +-- more = 256*256*256*a + 256*256*b +-- else +-- r = r + 1 +-- result[t] = utfchar(more + 256*a + b) +-- more = -1 +-- end +-- else +-- break +-- end +-- end +-- t[i] = concat(result,"",1,r) +-- end +-- return t +-- end +-- +-- local utf32_to_utf8_le_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utflinesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local r, more = 0, -1 +-- for a,b in bytepairs(t[i]) do +-- if a and b then +-- if more < 0 then +-- more = 256*b + a +-- else +-- r = r + 1 +-- result[t] = utfchar(more + 256*256*256*b + 256*256*a) +-- more = -1 +-- end +-- else +-- break +-- end +-- end +-- t[i] = concat(result,"",1,r) +-- end +-- return t +-- end + +local more = 0 + +local p_utf16_to_utf8_be = C(1) * C(1) /function(left,right) + local now = 256*byte(left) + byte(right) + if more > 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + return utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + return "" -- else the c's end up in the stream + else + return utfchar(now) + end +end --- we have three possibilities: +local p_utf16_to_utf8_le = C(1) * C(1) /function(right,left) + local now = 256*byte(left) + byte(right) + if more > 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + return utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + return "" -- else the c's end up in the stream + else + return utfchar(now) + end +end +local p_utf32_to_utf8_be = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d) + return utfchar(256*256*256*byte(a) + 256*256*byte(b) + 256*byte(c) + byte(d)) +end --- bytepairs: 0.048 --- gmatch : 0.069 --- lpeg : 0.089 (match time captures) +local p_utf32_to_utf8_le = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d) + return utfchar(256*256*256*byte(d) + 256*256*byte(c) + 256*byte(b) + byte(a)) +end -if bytepairs then +p_utf16_to_utf8_be = P(true) / function() more = 0 end * utf_16_be_getbom * Cs(p_utf16_to_utf8_be^0) +p_utf16_to_utf8_le = P(true) / function() more = 0 end * utf_16_le_getbom * Cs(p_utf16_to_utf8_le^0) +p_utf32_to_utf8_be = P(true) / function() more = 0 end * utf_32_be_getbom * Cs(p_utf32_to_utf8_be^0) +p_utf32_to_utf8_le = P(true) / function() more = 0 end * utf_32_le_getbom * Cs(p_utf32_to_utf8_le^0) - -- with a little bit more code we could include the linesplitter +patterns.utf16_to_utf8_be = p_utf16_to_utf8_be +patterns.utf16_to_utf8_le = p_utf16_to_utf8_le +patterns.utf32_to_utf8_be = p_utf32_to_utf8_be +patterns.utf32_to_utf8_le = p_utf32_to_utf8_le - utf16_to_utf8_be = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_be_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in bytepairs(t[i]) do - if right then - local now = 256*left + right - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t - end - return t +utf16_to_utf8_be = function(s) + if s and s ~= "" then + return lpegmatch(p_utf16_to_utf8_be,s) + else + return s end +end - utf16_to_utf8_le = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_le_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in bytepairs(t[i]) do - if right then - local now = 256*right + left - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t +local utf16_to_utf8_be_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_16_be_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf16_to_utf8_be,s) end - return t end + return t +end - utf32_to_utf8_be = function(t) - if type(t) == "string" then - t = lpegmatch(utflinesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, -1 - for a,b in bytepairs(t[i]) do - if a and b then - if more < 0 then - more = 256*256*256*a + 256*256*b - else - r = r + 1 - result[t] = utfchar(more + 256*a + b) - more = -1 - end - else - break - end - end - t[i] = concat(result,"",1,r) - end - return t +utf16_to_utf8_le = function(s) + if s and s ~= "" then + return lpegmatch(p_utf16_to_utf8_le,s) + else + return s end +end - utf32_to_utf8_le = function(t) - if type(t) == "string" then - t = lpegmatch(utflinesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, -1 - for a,b in bytepairs(t[i]) do - if a and b then - if more < 0 then - more = 256*b + a - else - r = r + 1 - result[t] = utfchar(more + 256*256*256*b + 256*256*a) - more = -1 - end - else - break - end - end - t[i] = concat(result,"",1,r) +local utf16_to_utf8_le_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_16_le_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf16_to_utf8_le,s) end - return t end + return t +end -else - - utf16_to_utf8_be = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_be_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in gmatch(t[i],"(.)(.)") do - if left == "\000" then -- experiment - r = r + 1 - result[r] = utfchar(byte(right)) - elseif right then - local now = 256*byte(left) + byte(right) - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t - end - return t +utf32_to_utf8_be = function(s) + if s and s ~= "" then + return lpegmatch(p_utf32_to_utf8_be,s) + else + return s end +end - utf16_to_utf8_le = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_le_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in gmatch(t[i],"(.)(.)") do - if right == "\000" then - r = r + 1 - result[r] = utfchar(byte(left)) - elseif right then - local now = 256*byte(right) + byte(left) - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t +local utf32_to_utf8_be_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_32_be_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf32_to_utf8_be,s) end - return t end + return t +end - utf32_to_utf8_le = function() return { } end -- never used anyway - utf32_to_utf8_be = function() return { } end -- never used anyway - - -- the next one is slighty slower - - -- local result, lines, r, more = { }, { }, 0, 0 - -- - -- local simple = Cmt( - -- C(1) * C(1), function(str,p,left,right) - -- local now = 256*byte(left) + byte(right) - -- if more > 0 then - -- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - -- more = 0 - -- r = r + 1 - -- result[r] = utfchar(now) - -- elseif now >= 0xD800 and now <= 0xDBFF then - -- more = now - -- else - -- r = r + 1 - -- result[r] = utfchar(now) - -- end - -- return p - -- end - -- ) - -- - -- local complex = Cmt( - -- C(1) * C(1), function(str,p,left,right) - -- local now = 256*byte(left) + byte(right) - -- if more > 0 then - -- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - -- more = 0 - -- r = r + 1 - -- result[r] = utfchar(now) - -- elseif now >= 0xD800 and now <= 0xDBFF then - -- more = now - -- else - -- r = r + 1 - -- result[r] = utfchar(now) - -- end - -- return p - -- end - -- ) - -- - -- local lineend = Cmt ( - -- patterns.utf_16_be_nl, function(str,p) - -- lines[#lines+1] = concat(result,"",1,r) - -- r, more = 0, 0 - -- return p - -- end - -- ) - -- - -- local be_1 = patterns.utfbom_16_be^-1 * (simple + complex)^0 - -- local be_2 = patterns.utfbom_16_be^-1 * (lineend + simple + complex)^0 - -- - -- utf16_to_utf8_be = function(t) - -- if type(t) == "string" then - -- local s = t - -- lines, r, more = { }, 0, 0 - -- lpegmatch(be_2,s) - -- if r > 0 then - -- lines[#lines+1] = concat(result,"",1,r) - -- end - -- result = { } - -- return lines - -- else - -- for i=1,#t do - -- r, more = 0, 0 - -- lpegmatch(be_1,t[i]) - -- t[i] = concat(result,"",1,r) - -- end - -- result = { } - -- return t - -- end - -- end +utf32_to_utf8_le = function(s) + if s and s ~= "" then + return lpegmatch(p_utf32_to_utf8_le,s) + else + return s + end +end +local utf32_to_utf8_le_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_32_le_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf32_to_utf8_le,s) + end + end + return t end -utf.utf16_to_utf8_le = utf16_to_utf8_le -utf.utf16_to_utf8_be = utf16_to_utf8_be -utf.utf32_to_utf8_le = utf32_to_utf8_le -utf.utf32_to_utf8_be = utf32_to_utf8_be +utf.utf16_to_utf8_le_t = utf16_to_utf8_le_t +utf.utf16_to_utf8_be_t = utf16_to_utf8_be_t +utf.utf32_to_utf8_le_t = utf32_to_utf8_le_t +utf.utf32_to_utf8_be_t = utf32_to_utf8_be_t -function utf.utf8_to_utf8(t) +utf.utf16_to_utf8_le = utf16_to_utf8_le +utf.utf16_to_utf8_be = utf16_to_utf8_be +utf.utf32_to_utf8_le = utf32_to_utf8_le +utf.utf32_to_utf8_be = utf32_to_utf8_be + +function utf.utf8_to_utf8_t(t) return type(t) == "string" and lpegmatch(utflinesplitter,t) or t end -function utf.utf16_to_utf8(t,endian) - return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +function utf.utf16_to_utf8_t(t,endian) + return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t end -function utf.utf32_to_utf8(t,endian) - return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t +function utf.utf32_to_utf8_t(t,endian) + return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t end -local function little(c) - local b = byte(c) +local function little(b) if b < 0x10000 then return char(b%256,b/256) else @@ -935,8 +1044,7 @@ local function little(c) end end -local function big(c) - local b = byte(c) +local function big(b) if b < 0x10000 then return char(b/256,b%256) else @@ -946,18 +1054,10 @@ local function big(c) end end --- function utf.utf8_to_utf16(str,littleendian) --- if littleendian then --- return char(255,254) .. utfgsub(str,".",little) --- else --- return char(254,255) .. utfgsub(str,".",big) --- end --- end - -local _, l_remap = utf.remapper(little) -local _, b_remap = utf.remapper(big) +local l_remap = Cs((p_utf8byte/little+P(1)/"")^0) +local b_remap = Cs((p_utf8byte/big +P(1)/"")^0) -function utf.utf8_to_utf16_be(str,nobom) +local function utf8_to_utf16_be(str,nobom) if nobom then return lpegmatch(b_remap,str) else @@ -965,7 +1065,7 @@ function utf.utf8_to_utf16_be(str,nobom) end end -function utf.utf8_to_utf16_le(str,nobom) +local function utf8_to_utf16_le(str,nobom) if nobom then return lpegmatch(l_remap,str) else @@ -973,11 +1073,14 @@ function utf.utf8_to_utf16_le(str,nobom) end end +utf.utf8_to_utf16_be = utf8_to_utf16_be +utf.utf8_to_utf16_le = utf8_to_utf16_le + function utf.utf8_to_utf16(str,littleendian,nobom) if littleendian then - return utf.utf8_to_utf16_le(str,nobom) + return utf8_to_utf16_le(str,nobom) else - return utf.utf8_to_utf16_be(str,nobom) + return utf8_to_utf16_be(str,nobom) end end @@ -1008,16 +1111,16 @@ function utf.xstring(s) end function utf.toeight(str) - if not str then + if not str or str == "" then return nil end local utftype = lpegmatch(p_utfstricttype,str) if utftype == "utf-8" then - return sub(str,4) - elseif utftype == "utf-16-le" then - return utf16_to_utf8_le(str) + return sub(str,4) -- remove the bom elseif utftype == "utf-16-be" then - return utf16_to_utf8_ne(str) + return utf16_to_utf8_be(str) -- bom gets removed + elseif utftype == "utf-16-le" then + return utf16_to_utf8_le(str) -- bom gets removed else return str end diff --git a/lualibs-url.lua b/lualibs-url.lua index 7bb7312..b189ec5 100644 --- a/lualibs-url.lua +++ b/lualibs-url.lua @@ -145,19 +145,25 @@ local splitquery = Cf ( Ct("") * P { "sequence", -- hasher local function hashed(str) -- not yet ok (/test?test) - if str == "" then + if not str or str == "" then return { scheme = "invalid", original = str, } end - local s = split(str) - local rawscheme = s[1] - local rawquery = s[4] - local somescheme = rawscheme ~= "" - local somequery = rawquery ~= "" + local detailed = split(str) + local rawscheme = "" + local rawquery = "" + local somescheme = false + local somequery = false + if detailed then + rawscheme = detailed[1] + rawquery = detailed[4] + somescheme = rawscheme ~= "" + somequery = rawquery ~= "" + end if not somescheme and not somequery then - s = { + return { scheme = "file", authority = "", path = str, @@ -167,30 +173,33 @@ local function hashed(str) -- not yet ok (/test?test) noscheme = true, filename = str, } - else -- not always a filename but handy anyway - local authority, path, filename = s[2], s[3] - if authority == "" then - filename = path - elseif path == "" then - filename = "" - else - filename = authority .. "/" .. path - end - s = { - scheme = rawscheme, - authority = authority, - path = path, - query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = - queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped - fragment = s[5], - original = str, - noscheme = false, - filename = filename, - } end - return s + -- not always a filename but handy anyway + local authority = detailed[2] + local path = detailed[3] + local filename = nil + if authority == "" then + filename = path + elseif path == "" then + filename = "" + else + filename = authority .. "/" .. path + end + return { + scheme = rawscheme, + authority = authority, + path = path, + query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = + queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped + fragment = detailed[5], + original = str, + noscheme = false, + filename = filename, + } end +-- inspect(hashed()) +-- inspect(hashed("")) -- inspect(hashed("template:///test")) -- inspect(hashed("template:///test++.whatever")) -- inspect(hashed("template:///test%2B%2B.whatever")) @@ -247,7 +256,7 @@ function url.construct(hash) -- dodo: we need to escape ! return lpegmatch(escaper,concat(fullurl)) end -local pattern = Cs(noslash * R("az","AZ") * (S(":|")/":") * noslash * P(1)^0) +local pattern = Cs(slash^-1/"" * R("az","AZ") * ((S(":|")/":") + P(":")) * slash * P(1)^0) function url.filename(filename) local spec = hashed(filename) @@ -257,6 +266,7 @@ end -- print(url.filename("/c|/test")) -- print(url.filename("/c/test")) +-- print(url.filename("file:///t:/sources/cow.svg")) local function escapestring(str) return lpegmatch(escaper,str) diff --git a/lualibs-util-prs.lua b/lualibs-util-prs.lua index f51f6fc..642968c 100644 --- a/lualibs-util-prs.lua +++ b/lualibs-util-prs.lua @@ -275,15 +275,25 @@ function parsers.array_to_string(a,separator) end end -function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway - t = t or { } --- for s in gmatch(str,"%s*([^, ]+)") do -- space added - for s in gmatch(str,"[^, ]+") do -- space added - t[s] = true - end - return t +-- function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway +-- if str then +-- t = t or { } +-- for s in gmatch(str,"[^, ]+") do -- space added +-- t[s] = true +-- end +-- return t +-- else +-- return { } +-- end +-- end + +local pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true))^1,rawset) + +function utilities.parsers.settings_to_set(str,t) + return str and lpegmatch(pattern,str) or { } end + function parsers.simple_hash_to_string(h, separator) local t, tn = { }, 0 for k, v in sortedhash(h) do diff --git a/lualibs-util-sta.lua b/lualibs-util-sta.lua index 1a61ec4..27ab5a6 100644 --- a/lualibs-util-sta.lua +++ b/lualibs-util-sta.lua @@ -81,6 +81,8 @@ end function stacker.new(name) + local report = logs.reporter("stacker",name or nil) + local s local stack = { } @@ -126,8 +128,18 @@ function stacker.new(name) end end - local tops = { } - local top, switch + local tops = { } + local top = nil + local switch = nil + + local function resolve_reset(mode) + if #tops > 0 then + report("resetting %s left-over states of %a",#tops,name) + end + tops = { } + top = nil + switch = nil + end local function resolve_begin(mode) if mode then @@ -206,8 +218,7 @@ function stacker.new(name) local function resolve_end() -- resolve_step(s.unset) - local noftop = #top - if noftop > 0 then + if #tops > 0 then -- was #top brrr local result = s.stop(s,top,1,#top) remove(tops) top = tops[#tops] @@ -224,8 +235,6 @@ function stacker.new(name) resolve_end() end - local report = logs.reporter("stacker",name or nil) - s = { name = name or "unknown", unset = -1, @@ -240,6 +249,7 @@ function stacker.new(name) resolve_begin = resolve_begin, resolve_step = resolve_step, resolve_end = resolve_end, + resolve_reset = resolve_reset, } return s -- we can overload functions diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua index 2739a20..a040b01 100644 --- a/lualibs-util-str.lua +++ b/lualibs-util-str.lua @@ -219,10 +219,12 @@ local striplinepatterns = { ["collapse"] = patterns.collapser, -- how about: stripper fullstripper } +setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end }) + strings.striplinepatterns = striplinepatterns function strings.striplines(str,how) - return str and lpegmatch(how and striplinepatterns[how] or p_prune_collapse,str) or str + return str and lpegmatch(striplinepatterns[how],str) or str end -- also see: string.collapsespaces @@ -537,7 +539,7 @@ end -- We could probably use just %s with integers but who knows what Lua 5.3 will do? So let's -- for the moment use %i. -local format_F = function() -- beware, no cast to number +local format_F = function(f) -- beware, no cast to number n = n + 1 if not f or f == "" then return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n) @@ -842,7 +844,7 @@ local builder = Cs { "start", + V("m") + V("M") -- new + V("z") -- new -- - + V("*") -- ignores probably messed up % + -- + V("?") -- ignores probably messed up % ) + V("*") ) @@ -897,6 +899,7 @@ local builder = Cs { "start", ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring) -- ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest, -- rest (including %%) + ["?"] = Cs(((1-P("%"))^1 )^1) / format_rest, -- rest (including %%) -- ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension, } diff --git a/lualibs-util-tab.lua b/lualibs-util-tab.lua index f9e9b31..077c906 100644 --- a/lualibs-util-tab.lua +++ b/lualibs-util-tab.lua @@ -98,6 +98,17 @@ function tables.removevalue(t,value) -- todo: n end end +function tables.replacevalue(t,oldvalue,newvalue) + if oldvalue and newvalue then + for i=1,#t do + if t[i] == oldvalue then + t[i] = newvalue + -- replace all, so no: return + end + end + end +end + function tables.insertbeforevalue(t,value,extra) for i=1,#t do if t[i] == extra then diff --git a/lualibs-util-tpl.lua b/lualibs-util-tpl.lua index 67d0582..bd0e261 100644 --- a/lualibs-util-tpl.lua +++ b/lualibs-util-tpl.lua @@ -52,7 +52,7 @@ local sqlescape = lpeg.replacer { -- { "\t", "\\t" }, } -local sqlquoted = lpeg.Cs(lpeg.Cc("'") * sqlescape * lpeg.Cc("'")) +local sqlquoted = Cs(Cc("'") * sqlescape * Cc("'")) lpegpatterns.sqlescape = sqlescape lpegpatterns.sqlquoted = sqlquoted @@ -111,13 +111,21 @@ local luaescaper = escapers.lua local quotedluaescaper = quotedescapers.lua local function replacekeyunquoted(s,t,how,recurse) -- ".. \" " - local escaper = how and escapers[how] or luaescaper - return escaper(replacekey(s,t,how,recurse)) + if how == false then + return replacekey(s,t,how,recurse) + else + local escaper = how and escapers[how] or luaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local function replacekeyquoted(s,t,how,recurse) -- ".. \" " - local escaper = how and quotedescapers[how] or quotedluaescaper - return escaper(replacekey(s,t,how,recurse)) + if how == false then + return replacekey(s,t,how,recurse) + else + local escaper = how and quotedescapers[how] or quotedluaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local single = P("%") -- test %test% test : resolves test @@ -188,3 +196,5 @@ end -- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" })) -- inspect(utilities.templates.resolve({ one = "%two%", two = "two", three = "%three%" })) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false,true)) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false)) -- cgit v1.2.3