diff options
| -rw-r--r-- | lualibs-boolean.lua | 6 | ||||
| -rw-r--r-- | lualibs-dir.lua | 94 | ||||
| -rw-r--r-- | lualibs-file.lua | 210 | ||||
| -rw-r--r-- | lualibs-io.lua | 5 | ||||
| -rw-r--r-- | lualibs-lpeg.lua | 329 | ||||
| -rw-r--r-- | lualibs-lua.lua | 45 | ||||
| -rw-r--r-- | lualibs-math.lua | 4 | ||||
| -rw-r--r-- | lualibs-md5.lua | 64 | ||||
| -rw-r--r-- | lualibs-os.lua | 16 | ||||
| -rw-r--r-- | lualibs-string.lua | 9 | ||||
| -rw-r--r-- | lualibs-table.lua | 306 | ||||
| -rw-r--r-- | lualibs-trac-inf.lua | 14 | ||||
| -rw-r--r-- | lualibs-unicode.lua | 675 | ||||
| -rw-r--r-- | lualibs-url.lua | 68 | ||||
| -rw-r--r-- | lualibs-util-dim.lua | 4 | ||||
| -rw-r--r-- | lualibs-util-prs.lua | 124 | ||||
| -rw-r--r-- | lualibs-util-sta.lua | 22 | ||||
| -rw-r--r-- | lualibs-util-sto.lua | 21 | ||||
| -rw-r--r-- | lualibs-util-str.lua | 54 | ||||
| -rw-r--r-- | lualibs-util-tab.lua | 144 | ||||
| -rw-r--r-- | lualibs-util-tpl.lua | 41 | ||||
| -rw-r--r-- | lualibs.dtx | 11 | 
22 files changed, 1445 insertions, 821 deletions
diff --git a/lualibs-boolean.lua b/lualibs-boolean.lua index 8d11080..8f18d4c 100644 --- a/lualibs-boolean.lua +++ b/lualibs-boolean.lua @@ -57,11 +57,11 @@ function string.booleanstring(str)      end  end -function string.is_boolean(str,default) +function string.is_boolean(str,default,strict)      if type(str) == "string" then -        if str == "true" or str == "yes" or str == "on" or str == "t" or str == "1" then +        if str == "true" or str == "yes" or str == "on" or str == "t" or (not strict and str == "1") then              return true -        elseif str == "false" or str == "no" or str == "off" or str == "f" or str == "0" then +        elseif str == "false" or str == "no" or str == "off" or str == "f" or (not strict and str == "0") then              return false          end      end diff --git a/lualibs-dir.lua b/lualibs-dir.lua index bcf28d0..81ac65e 100644 --- a/lualibs-dir.lua +++ b/lualibs-dir.lua @@ -6,7 +6,8 @@ if not modules then modules = { } end modules ['l-dir'] = {      license   = "see context related readme files"  } --- dir.expandname will be merged with cleanpath and collapsepath +-- todo: dir.expandname will be sped up and merged with cleanpath and collapsepath +-- todo: keep track of currentdir (chdir, pushdir, popdir)  local type, select = type, select  local find, gmatch, match, gsub, sub = string.find, string.gmatch, string.match, string.gsub, string.sub @@ -156,7 +157,7 @@ end  local function globpattern(path,patt,recurse,method)      local kind = type(method) -    if pattern and sub(patt,1,-3) == path then +    if patt and sub(patt,1,-3) == path then          patt = false      end      if kind == "function" then @@ -209,7 +210,7 @@ end  dir.collectpattern = collectpattern -local separator +local separator, pattern  if onwindows then -- we could sanitize here @@ -490,52 +491,63 @@ end  dir.makedirs = dir.mkdirs --- we can only define it here as it uses dir.current -if onwindows then +do -    function dir.expandname(str) -- will be merged with cleanpath and collapsepath\ -        local first, nothing, last = match(str,"^(//)(//*)(.*)$") -        if first then -            first = dir.current() .. "/" -- dir.current sanitizes -        end -        if not first then -            first, last = match(str,"^(//)/*(.*)$") -        end -        if not first then -            first, last = match(str,"^([a-zA-Z]:)(.*)$") -            if first and not find(last,"^/") then -                local d = currentdir() -                if chdir(first) then -                    first = dir.current() +    -- we can only define it here as it uses dir.chdir and we also need to +    -- make sure we use the non sandboxed variant because otherwise we get +    -- into a recursive loop due to usage of expandname in the file resolver + +    local chdir = sandbox and sandbox.original(chdir) or chdir + +    if onwindows then + +        local xcurrentdir = dir.current + +        function dir.expandname(str) -- will be merged with cleanpath and collapsepath\ +            local first, nothing, last = match(str,"^(//)(//*)(.*)$") +            if first then +                first = xcurrentdir() .. "/" -- xcurrentdir sanitizes +            end +            if not first then +                first, last = match(str,"^(//)/*(.*)$") +            end +            if not first then +                first, last = match(str,"^([a-zA-Z]:)(.*)$") +                if first and not find(last,"^/") then +                    local d = currentdir() -- push / pop +                    if chdir(first) then +                        first = xcurrentdir() -- xcurrentdir sanitizes +                    end +                    chdir(d)                  end -                chdir(d) +            end +            if not first then +                first, last = xcurrentdir(), str +            end +            last = gsub(last,"//","/") +            last = gsub(last,"/%./","/") +            last = gsub(last,"^/*","") +            first = gsub(first,"/*$","") +            if last == "" or last == "." then +                return first +            else +                return first .. "/" .. last              end          end -        if not first then -            first, last = dir.current(), str -        end -        last = gsub(last,"//","/") -        last = gsub(last,"/%./","/") -        last = gsub(last,"^/*","") -        first = gsub(first,"/*$","") -        if last == "" or last == "." then -            return first -        else -            return first .. "/" .. last -        end -    end -else +    else -    function dir.expandname(str) -- will be merged with cleanpath and collapsepath -        if not find(str,"^/") then -            str = currentdir() .. "/" .. str +        function dir.expandname(str) -- will be merged with cleanpath and collapsepath +            if not find(str,"^/") then +                str = currentdir() .. "/" .. str +            end +            str = gsub(str,"//","/") +            str = gsub(str,"/%./","/") +            str = gsub(str,"(.)/%.$","%1") +            return str          end -        str = gsub(str,"//","/") -        str = gsub(str,"/%./","/") -        str = gsub(str,"(.)/%.$","%1") -        return str +      end  end diff --git a/lualibs-file.lua b/lualibs-file.lua index c05372a..7ed6370 100644 --- a/lualibs-file.lua +++ b/lualibs-file.lua @@ -15,51 +15,53 @@ if not lfs then      lfs = optionalrequire("lfs")  end -if not lfs then - -    lfs = { -        getcurrentdir = function() -            return "." -        end, -        attributes = function() -            return nil -        end, -        isfile = function(name) -            local f = io.open(name,'rb') -            if f then -                f:close() -                return true -            end -        end, -        isdir = function(name) -            print("you need to load lfs") -            return false -        end -    } - -elseif not lfs.isfile then - -    local attributes = lfs.attributes - -    function lfs.isdir(name) -        return attributes(name,"mode") == "directory" -    end - -    function lfs.isfile(name) -        return attributes(name,"mode") == "file" -    end - - -- function lfs.isdir(name) - --     local a = attributes(name) - --     return a and a.mode == "directory" - -- end - - -- function lfs.isfile(name) - --     local a = attributes(name) - --     return a and a.mode == "file" - -- end - -end +-- -- see later +-- +-- if not lfs then +-- +--     lfs = { +--         getcurrentdir = function() +--             return "." +--         end, +--         attributes = function() +--             return nil +--         end, +--         isfile = function(name) +--             local f = io.open(name,'rb') +--             if f then +--                 f:close() +--                 return true +--             end +--         end, +--         isdir = function(name) +--             print("you need to load lfs") +--             return false +--         end +--     } +-- +-- elseif not lfs.isfile then +-- +--     local attributes = lfs.attributes +-- +--     function lfs.isdir(name) +--         return attributes(name,"mode") == "directory" +--     end +-- +--     function lfs.isfile(name) +--         return attributes(name,"mode") == "file" +--     end +-- +--  -- function lfs.isdir(name) +--  --     local a = attributes(name) +--  --     return a and a.mode == "directory" +--  -- end +-- +--  -- function lfs.isfile(name) +--  --     local a = attributes(name) +--  --     return a and a.mode == "file" +--  -- end +-- +-- end  local insert, concat = table.insert, table.concat  local match, find, gmatch = string.match, string.find, string.gmatch @@ -72,6 +74,28 @@ local checkedsplit = string.checkedsplit  local P, R, S, C, Cs, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc, lpeg.Ct +-- better this way: + +local tricky     = S("/\\") * P(-1) +local attributes = lfs.attributes + +if sandbox then +    sandbox.redefine(lfs.isfile,"lfs.isfile") +    sandbox.redefine(lfs.isdir, "lfs.isdir") +end + +function lfs.isdir(name) +    if lpegmatch(tricky,name) then +        return attributes(name,"mode") == "directory" +    else +        return attributes(name.."/.","mode") == "directory" +    end +end + +function lfs.isfile(name) +    return attributes(name,"mode") == "file" +end +  local colon     = P(":")  local period    = P(".")  local periods   = P("..") @@ -133,8 +157,8 @@ file.suffix       = suffixonly  file.suffixesonly = suffixesonly  file.suffixes     = suffixesonly -file.dirname    = pathpart   -- obsolete -file.extname    = suffixonly -- obsolete +file.dirname      = pathpart   -- obsolete +file.extname      = suffixonly -- obsolete  -- actually these are schemes @@ -385,31 +409,90 @@ local deslasher = lpeg.replacer(S("\\/")^1,"/")  -- then we still have to deal with urls ... anyhow, multiple // are never a real  -- problem but just ugly. -function file.join(...) -    local lst = { ... } -    local one = lst[1] +-- function file.join(...) +--     local lst = { ... } +--     local one = lst[1] +--     if lpegmatch(isnetwork,one) then +--         local one = lpegmatch(reslasher,one) +--         local two = lpegmatch(deslasher,concat(lst,"/",2)) +--         if lpegmatch(hasroot,two) then +--             return one .. two +--         else +--             return one .. "/" .. two +--         end +--     elseif lpegmatch(isroot,one) then +--         local two = lpegmatch(deslasher,concat(lst,"/",2)) +--         if lpegmatch(hasroot,two) then +--             return two +--         else +--             return "/" .. two +--         end +--     elseif one == "" then +--         return lpegmatch(stripper,concat(lst,"/",2)) +--     else +--         return lpegmatch(deslasher,concat(lst,"/")) +--     end +-- end + +function file.join(one, two, three, ...) +    if not two then +        return one == "" and one or lpegmatch(stripper,one) +    end +    if one == "" then +        return lpegmatch(stripper,three and concat({ two, three, ... },"/") or two) +    end      if lpegmatch(isnetwork,one) then          local one = lpegmatch(reslasher,one) -        local two = lpegmatch(deslasher,concat(lst,"/",2)) +        local two = lpegmatch(deslasher,three and concat({ two, three, ... },"/") or two)          if lpegmatch(hasroot,two) then              return one .. two          else              return one .. "/" .. two          end      elseif lpegmatch(isroot,one) then -        local two = lpegmatch(deslasher,concat(lst,"/",2)) +        local two = lpegmatch(deslasher,three and concat({ two, three, ... },"/") or two)          if lpegmatch(hasroot,two) then              return two          else              return "/" .. two          end -    elseif one == "" then -        return lpegmatch(stripper,concat(lst,"/",2))      else -        return lpegmatch(deslasher,concat(lst,"/")) +        return lpegmatch(deslasher,concat({  one, two, three, ... },"/"))      end  end +-- or we can use this: +-- +-- function file.join(...) +--     local n = select("#",...) +--     local one = select(1,...) +--     if n == 1 then +--         return one == "" and one or lpegmatch(stripper,one) +--     end +--     if one == "" then +--         return lpegmatch(stripper,n > 2 and concat({ ... },"/",2) or select(2,...)) +--     end +--     if lpegmatch(isnetwork,one) then +--         local one = lpegmatch(reslasher,one) +--         local two = lpegmatch(deslasher,n > 2 and concat({ ... },"/",2) or select(2,...)) +--         if lpegmatch(hasroot,two) then +--             return one .. two +--         else +--             return one .. "/" .. two +--         end +--     elseif lpegmatch(isroot,one) then +--         local two = lpegmatch(deslasher,n > 2 and concat({ ... },"/",2) or select(2,...)) +--         if lpegmatch(hasroot,two) then +--             return two +--         else +--             return "/" .. two +--         end +--     else +--         return lpegmatch(deslasher,concat({ ... },"/")) +--     end +-- end + +-- print(file.join("c:/whatever"))  -- print(file.join("c:/whatever","name"))  -- print(file.join("//","/y"))  -- print(file.join("/","/y")) @@ -495,23 +578,6 @@ function file.collapsepath(str,anchor) -- anchor: false|nil, true, "."      end  end --- better this way: - -local tricky     = S("/\\") * P(-1) -local attributes = lfs.attributes - -function lfs.isdir(name) -    if lpegmatch(tricky,name) then -        return attributes(name,"mode") == "directory" -    else -        return attributes(name.."/.","mode") == "directory" -    end -end - -function lfs.isfile(name) -    return attributes(name,"mode") == "file" -end -  -- local function test(str,...)  --    print(string.format("%-20s %-15s %-30s %-20s",str,file.collapsepath(str),file.collapsepath(str,true),file.collapsepath(str,".")))  -- end diff --git a/lualibs-io.lua b/lualibs-io.lua index 020e811..a91d44d 100644 --- a/lualibs-io.lua +++ b/lualibs-io.lua @@ -339,11 +339,6 @@ function io.readstring(f,n,m)      return str  end --- - -if not io.i_limiter then function io.i_limiter() end end -- dummy so we can test safely -if not io.o_limiter then function io.o_limiter() end end -- dummy so we can test safely -  -- This works quite ok:  --  -- function io.piped(command,writer) diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index f3fd28b..5be1246 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -10,6 +10,8 @@ if not modules then modules = { } end modules ['l-lpeg'] = {  -- if i can use new features like capture / 2 and .B (at first sight the xml  -- parser is some 5% slower) +-- lpeg.P("abc") is faster than lpeg.P("a") * lpeg.P("b") * lpeg.P("c") +  -- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)  -- move utf    -> l-unicode @@ -19,7 +21,7 @@ lpeg = require("lpeg")  -- The latest lpeg doesn't have print any more, and even the new ones are not  -- available by default (only when debug mode is enabled), which is a pitty as --- as it helps nailign down bottlenecks. Performance seems comparable: some 10% +-- as it helps nailing down bottlenecks. Performance seems comparable: some 10%  -- slower pattern compilation, same parsing speed, although,  --  -- local p = lpeg.C(lpeg.P(1)^0 * lpeg.P(-1)) @@ -80,7 +82,7 @@ local lpegtype, lpegmatch, lpegprint = lpeg.type, lpeg.match, lpeg.print  -- let's start with an inspector:  if setinspector then -    setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) +    setinspector("lpeg",function(v) if lpegtype(v) then lpegprint(v) return true end end)  end  -- Beware, we predefine a bunch of patterns here and one reason for doing so @@ -145,6 +147,9 @@ patterns.utfbom_8      = utfbom_8  patterns.utf_16_be_nl  = P("\000\r\000\n") + P("\000\r") + P("\000\n") -- P("\000\r") * (P("\000\n") + P(true)) + P("\000\n")  patterns.utf_16_le_nl  = P("\r\000\n\000") + P("\r\000") + P("\n\000") -- P("\r\000") * (P("\n\000") + P(true)) + P("\n\000") +patterns.utf_32_be_nl  = P("\000\000\000\r\000\000\000\n") + P("\000\000\000\r") + P("\000\000\000\n") +patterns.utf_32_le_nl  = P("\r\000\000\000\n\000\000\000") + P("\r\000\000\000") + P("\n\000\000\000") +  patterns.utf8one       = R("\000\127")  patterns.utf8two       = R("\194\223") * utf8next  patterns.utf8three     = R("\224\239") * utf8next * utf8next @@ -183,10 +188,26 @@ local fullstripper     = whitespace^0 * C((whitespace^0 * nonwhitespace^1)^0)  ----- collapser        = Cs(spacer^0/"" * ((spacer^1 * endofstring / "") + (spacer^1/" ") + P(1))^0)  local collapser        = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) +local b_collapser      = Cs( whitespace^0        /"" * (nonwhitespace^1 + whitespace^1/" ")^0) +local e_collapser      = Cs((whitespace^1 * P(-1)/"" +  nonwhitespace^1 + whitespace^1/" ")^0) +local m_collapser      = Cs(                           (nonwhitespace^1 + whitespace^1/" ")^0) + +local b_stripper      = Cs( spacer^0        /"" * (nonspacer^1 + spacer^1/" ")^0) +local e_stripper      = Cs((spacer^1 * P(-1)/"" +  nonspacer^1 + spacer^1/" ")^0) +local m_stripper      = Cs(                       (nonspacer^1 + spacer^1/" ")^0) +  patterns.stripper      = stripper  patterns.fullstripper  = fullstripper  patterns.collapser     = collapser +patterns.b_collapser   = b_collapser +patterns.m_collapser   = m_collapser +patterns.e_collapser   = e_collapser + +patterns.b_stripper    = b_stripper +patterns.m_stripper    = m_stripper +patterns.e_stripper    = e_stripper +  patterns.lowercase     = lowercase  patterns.uppercase     = uppercase  patterns.letter        = patterns.lowercase + patterns.uppercase @@ -815,121 +836,185 @@ end  -- experiment: --- local function make(t) ---     local p ---     local keys = sortedkeys(t) ---     for i=1,#keys do ---         local k = keys[i] ---         local v = t[k] ---         if not p then ---             if next(v) then ---                 p = P(k) * make(v) ---             else ---                 p = P(k) ---             end ---         else ---             if next(v) then ---                 p = p + P(k) * make(v) ---             else ---                 p = p + P(k) ---             end ---         end ---     end ---     return p --- end - --- local function make(t) ---     local p = P(false) ---     local keys = sortedkeys(t) ---     for i=1,#keys do ---         local k = keys[i] ---         local v = t[k] ---         if next(v) then ---             p = p + P(k) * make(v) ---         else ---             p = p + P(k) ---         end ---     end ---     return p --- end - --- function lpeg.utfchartabletopattern(list) -- goes to util-lpg ---     local tree = { } ---     for i=1,#list do ---         local t = tree ---         for c in gmatch(list[i],".") do ---             local tc = t[c] ---             if not tc then ---                 tc = { } ---                 t[c] = tc ---             end ---             t = tc ---         end ---     end ---     return make(tree) --- end +local p_false = P(false) +local p_true  = P(true) -local function make(t,hash) -    local p = P(false) +local function make(t) +    local function making(t) +        local p    = p_false +        local keys = sortedkeys(t) +        for i=1,#keys do +            local k = keys[i] +            if k ~= "" then +                local v = t[k] +                if v == true then +                    p = p + P(k) * p_true +                elseif v == false then +                    -- can't happen +                else +                    p = p + P(k) * making(v) +                end +            end +        end +        if t[""] then +            p = p + p_true +        end +        return p +    end +    local p    = p_false      local keys = sortedkeys(t)      for i=1,#keys do          local k = keys[i] -        local v = t[k] -        local h = hash[v] -        if h then -            if next(v) then -                p = p + P(k) * (make(v,hash) + P(true)) +        if k ~= "" then +            local v = t[k] +            if v == true then +                p = p + P(k) * p_true +            elseif v == false then +                -- can't happen              else -                p = p + P(k) * P(true) +                p = p + P(k) * making(v)              end -        else -            if next(v) then -                p = p + P(k) * make(v,hash) +        end +    end +    return p +end + +local function collapse(t,x) +    if type(t) ~= "table" then +        return t, x +    else +        local n = next(t) +        if n == nil then +            return t, x +        elseif next(t,n) == nil then +            -- one entry +            local k = n +            local v = t[k] +            if type(v) == "table" then +                return collapse(v,x..k)              else -                p = p + P(k) +                return v, x .. k              end +        else +            local tt = { } +            for k, v in next, t do +                local vv, kk = collapse(v,k) +                tt[kk] = vv +            end +            return tt, x          end      end -    return p  end  function lpeg.utfchartabletopattern(list) -- goes to util-lpg      local tree = { } -    local hash = { }      local n = #list      if n == 0 then -        -- we could always use this branch          for s in next, list do              local t = tree +            local p, pk              for c in gmatch(s,".") do -                local tc = t[c] -                if not tc then -                    tc = { } -                    t[c] = tc +                if t == true then +                    t = { [c] = true, [""] = true } +                    p[pk] = t +                    p = t +                    t = false +                elseif t == false then +                    t = { [c] = false } +                    p[pk] = t +                    p = t +                    t = false +                else +                    local tc = t[c] +                    if not tc then +                        tc = false +                        t[c] = false +                    end +                    p = t +                    t = tc                  end -                t = tc +                pk = c +            end +            if t == false then +                p[pk] = true +            elseif t == true then +                -- okay +            else +                t[""] = true              end -            hash[t] = s          end      else          for i=1,n do -            local t = tree              local s = list[i] +            local t = tree +            local p, pk              for c in gmatch(s,".") do -                local tc = t[c] -                if not tc then -                    tc = { } -                    t[c] = tc +                if t == true then +                    t = { [c] = true, [""] = true } +                    p[pk] = t +                    p = t +                    t = false +                elseif t == false then +                    t = { [c] = false } +                    p[pk] = t +                    p = t +                    t = false +                else +                    local tc = t[c] +                    if not tc then +                        tc = false +                        t[c] = false +                    end +                    p = t +                    t = tc                  end -                t = tc +                pk = c +            end +            if t == false then +                p[pk] = true +            elseif t == true then +                -- okay +            else +                t[""] = true              end -            hash[t] = s          end      end -    return make(tree,hash) +--     collapse(tree,"") -- needs testing, maybe optional, slightly faster because P("x")*P("X") seems slower than P"(xX") (why) +--     inspect(tree) +    return make(tree)  end --- inspect ( lpeg.utfchartabletopattern { +-- local t = { "start", "stoep", "staart", "paard" } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/string.upper + 1)^1) + +-- local t = { "a", "abc", "ac", "abe", "abxyz", "xy", "bef","aa" } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/string.upper + 1)^1) + +-- inspect(lpegmatch(p,"a")) +-- inspect(lpegmatch(p,"aa")) +-- inspect(lpegmatch(p,"aaaa")) +-- inspect(lpegmatch(p,"ac")) +-- inspect(lpegmatch(p,"bc")) +-- inspect(lpegmatch(p,"zzbczz")) +-- inspect(lpegmatch(p,"zzabezz")) +-- inspect(lpegmatch(p,"ab")) +-- inspect(lpegmatch(p,"abc")) +-- inspect(lpegmatch(p,"abe")) +-- inspect(lpegmatch(p,"xa")) +-- inspect(lpegmatch(p,"bx")) +-- inspect(lpegmatch(p,"bax")) +-- inspect(lpegmatch(p,"abxyz")) +-- inspect(lpegmatch(p,"foobarbefcrap")) + +-- local t = { ["^"] = 1, ["^^"] = 2, ["^^^"] = 3, ["^^^^"] = 4 } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/t + 1)^1) +-- inspect(lpegmatch(p," ^ ^^ ^^^ ^^^^ ^^^^^ ^^^^^^ ^^^^^^^ ")) + +-- local t = { ["^^"] = 2, ["^^^"] = 3, ["^^^^"] = 4 } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/t + 1)^1) +-- inspect(lpegmatch(p," ^ ^^ ^^^ ^^^^ ^^^^^ ^^^^^^ ^^^^^^^ ")) + +-- lpeg.utfchartabletopattern {  --     utfchar(0x00A0), -- nbsp  --     utfchar(0x2000), -- enquad  --     utfchar(0x2001), -- emquad @@ -945,7 +1030,7 @@ end  --     utfchar(0x200B), -- zerowidthspace  --     utfchar(0x202F), -- narrownobreakspace  --     utfchar(0x205F), -- math thinspace --- } ) +-- }  -- a few handy ones:  -- @@ -1014,3 +1099,75 @@ lpeg.patterns.stripzeros = stripper  -- lpegmatch(stripper,str)  -- print(#str, os.clock()-ts, lpegmatch(stripper,sample)) +-- for practical reasone we keep this here: + +local byte_to_HEX = { } +local byte_to_hex = { } +local byte_to_dec = { } -- for md5 +local hex_to_byte = { } + +for i=0,255 do +    local H = format("%02X",i) +    local h = format("%02x",i) +    local d = format("%03i",i) +    local c = char(i) +    byte_to_HEX[c] = H +    byte_to_hex[c] = h +    byte_to_dec[c] = d +    hex_to_byte[h] = c +    hex_to_byte[H] = c +end + +local hextobyte  = P(2)/hex_to_byte +local bytetoHEX  = P(1)/byte_to_HEX +local bytetohex  = P(1)/byte_to_hex +local bytetodec  = P(1)/byte_to_dec +local hextobytes = Cs(hextobyte^0) +local bytestoHEX = Cs(bytetoHEX^0) +local bytestohex = Cs(bytetohex^0) +local bytestodec = Cs(bytetodec^0) + +patterns.hextobyte  = hextobyte +patterns.bytetoHEX  = bytetoHEX +patterns.bytetohex  = bytetohex +patterns.bytetodec  = bytetodec +patterns.hextobytes = hextobytes +patterns.bytestoHEX = bytestoHEX +patterns.bytestohex = bytestohex +patterns.bytestodec = bytestodec + +function string.toHEX(s) +    if not s or s == "" then +        return s +    else +        return lpegmatch(bytestoHEX,s) +    end +end + +function string.tohex(s) +    if not s or s == "" then +        return s +    else +        return lpegmatch(bytestohex,s) +    end +end + +function string.todec(s) +    if not s or s == "" then +        return s +    else +        return lpegmatch(bytestodec,s) +    end +end + +function string.tobytes(s) +    if not s or s == "" then +        return s +    else +        return lpegmatch(hextobytes,s) +    end +end + +-- local h = "ADFE0345" +-- local b = lpegmatch(patterns.hextobytes,h) +-- print(h,b,string.tohex(b),string.toHEX(b)) diff --git a/lualibs-lua.lua b/lualibs-lua.lua index 9565f48..cb61829 100644 --- a/lualibs-lua.lua +++ b/lualibs-lua.lua @@ -129,22 +129,36 @@ local print, select, tostring = print, select, tostring  local inspectors = { } -function setinspector(inspector) -- global function -    inspectors[#inspectors+1] = inspector +function setinspector(kind,inspector) -- global function +    inspectors[kind] = inspector  end  function inspect(...) -- global function      for s=1,select("#",...) do          local value = select(s,...) -        local done = false -        for i=1,#inspectors do -            done = inspectors[i](value) -            if done then -                break +        if value == nil then +            print("nil") +        else +            local done  = false +            -- type driven (table) +            local kind      = type(value) +            local inspector = inspectors[kind] +            if inspector then +                done = inspector(value) +                if done then +                    break +                end +            end +            -- whatever driven (token, node, ...) +            for kind, inspector in next, inspectors do +                done = inspector(value) +                if done then +                    break +                end +            end +            if not done then +                print(tostring(value))              end -        end -        if not done then -            print(tostring(value))          end      end  end @@ -165,3 +179,14 @@ end  if lua then      lua.mask = load([[τεχ = 1]]) and "utf" or "ascii"  end + +local flush   = io.flush + +if flush then + +    local execute = os.execute if execute then function os.execute(...) flush() return execute(...) end end +    local exec    = os.exec    if exec    then function os.exec   (...) flush() return exec   (...) end end +    local spawn   = os.spawn   if spawn   then function os.spawn  (...) flush() return spawn  (...) end end +    local popen   = io.popen   if popen   then function io.popen  (...) flush() return popen  (...) end end + +end diff --git a/lualibs-math.lua b/lualibs-math.lua index 43f60b5..ec62919 100644 --- a/lualibs-math.lua +++ b/lualibs-math.lua @@ -8,6 +8,10 @@ if not modules then modules = { } end modules ['l-math'] = {  local floor, sin, cos, tan = math.floor, math.sin, math.cos, math.tan +if not math.ceiling then +    math.ceiling = math.ceil +end +  if not math.round then      function math.round(x) return floor(x + 0.5) end  end diff --git a/lualibs-md5.lua b/lualibs-md5.lua index 8ac20a5..00272c8 100644 --- a/lualibs-md5.lua +++ b/lualibs-md5.lua @@ -19,48 +19,38 @@ if not md5 then  end  local md5, file = md5, file -local gsub, format, byte = string.gsub, string.format, string.byte -local md5sum = md5.sum +local gsub = string.gsub -local function convert(str,fmt) -    return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end)) -end - -if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end -if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end -if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end - --- local P, Cs, lpegmatch = lpeg.P, lpeg.Cs,lpeg.match --- --- if not md5.HEX then ---     local function remap(chr) return format("%02X",byte(chr)) end ---     function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end --- end +-- local gsub, format, byte = string.gsub, string.format, string.byte  -- --- if not md5.hex then ---     local function remap(chr) return format("%02x",byte(chr)) end ---     function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end +-- local function convert(str,fmt) +--     return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end))  -- end  -- --- if not md5.dec then ---     local function remap(chr) return format("%03i",byte(chr)) end ---     function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end --- end +-- if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end +-- if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end +-- if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end --- if not md5.HEX then ---     local pattern_HEX = Cs( ( P(1) / function(chr) return format("%02X",byte(chr)) end)^0 ) ---     function md5.HEX(str) return lpegmatch(pattern_HEX,md5.sum(str)) end --- end --- --- if not md5.hex then ---     local pattern_hex = Cs( ( P(1) / function(chr) return format("%02x",byte(chr)) end)^0 ) ---     function md5.hex(str) return lpegmatch(pattern_hex,md5.sum(str)) end --- end --- --- if not md5.dec then ---     local pattern_dec = Cs( ( P(1) / function(chr) return format("%02i",byte(chr)) end)^0 ) ---     function md5.dec(str) return lpegmatch(pattern_dec,md5.sum(str)) end --- end +do + +    local patterns = lpeg and lpeg.patterns + +    if patterns then + +        local bytestoHEX = patterns.bytestoHEX +        local bytestohex = patterns.bytestohex +        local bytestodec = patterns.bytestodec + +        local lpegmatch = lpeg.match +        local md5sum    = md5.sum + +        if not md5.HEX then function md5.HEX(str) if str then return lpegmatch(bytestoHEX,md5sum(str)) end end end +        if not md5.hex then function md5.hex(str) if str then return lpegmatch(bytestohex,md5sum(str)) end end end +        if not md5.dec then function md5.dec(str) if str then return lpegmatch(bytestodec,md5sum(str)) end end end + +    end + +end  function file.needsupdating(oldname,newname,threshold) -- size modification access change      local oldtime = lfs.attributes(oldname,"modification") diff --git a/lualibs-os.lua b/lualibs-os.lua index 1dff79c..0a86ea6 100644 --- a/lualibs-os.lua +++ b/lualibs-os.lua @@ -25,8 +25,6 @@ if not modules then modules = { } end modules ['l-os'] = {  -- os.sleep() => socket.sleep()  -- math.randomseed(tonumber(string.sub(string.reverse(tostring(math.floor(socket.gettime()*10000))),1,6))) --- maybe build io.flush in os.execute -  local os = os  local date, time = os.date, os.time  local find, format, gsub, upper, gmatch = string.find, string.format, string.gsub, string.upper, string.gmatch @@ -118,15 +116,11 @@ end  -- end of environment hack -local execute, spawn, exec, iopopen, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.popen, io.flush - -function os.execute(...) ioflush() return execute(...) end -function os.spawn  (...) ioflush() return spawn  (...) end -function os.exec   (...) ioflush() return exec   (...) end -function io.popen  (...) ioflush() return iopopen(...) end +local execute = os.execute +local iopopen = io.popen  function os.resultof(command) -    local handle = io.popen(command,"r") +    local handle = iopopen(command,"r") -- already has flush      if handle then          local result = handle:read("*all") or ""          handle:close() @@ -160,7 +154,7 @@ local launchers = {  }  function os.launch(str) -    os.execute(format(launchers[os.name] or launchers.unix,str)) +    execute(format(launchers[os.name] or launchers.unix,str))  end  if not os.times then -- ? @@ -355,6 +349,8 @@ else  end +os.newline = name == "windows" and "\013\010" or "\010" -- crlf or lf +  function resolvers.bits(t,k)      local bits = find(os.platform,"64",1,true) and 64 or 32      os.bits = bits diff --git a/lualibs-string.lua b/lualibs-string.lua index 3b1a000..e9dc2bb 100644 --- a/lualibs-string.lua +++ b/lualibs-string.lua @@ -94,7 +94,7 @@ end  --     return not find(str,"%S")  -- end -local pattern = P(" ")^0 * P(-1) +local pattern = P(" ")^0 * P(-1) -- maybe also newlines  -- patterns.onlyspaces = pattern @@ -192,10 +192,11 @@ string.itself  = function(s) return s end  -- also handy (see utf variant) -local pattern = Ct(C(1)^0) -- string and not utf ! +local pattern_c = Ct( C(1)      ^0) -- string and not utf ! +local pattern_b = Ct((C(1)/byte)^0) -function string.totable(str) -    return lpegmatch(pattern,str) +function string.totable(str,bytes) +    return lpegmatch(bytes and pattern_b or pattern_c,str)  end  -- handy from within tex: diff --git a/lualibs-table.lua b/lualibs-table.lua index e642106..552097e 100644 --- a/lualibs-table.lua +++ b/lualibs-table.lua @@ -39,7 +39,7 @@ end  function table.keys(t)      if t then          local keys, k = { }, 0 -        for key, _ in next, t do +        for key in next, t do              k = k + 1              keys[k] = key          end @@ -49,35 +49,127 @@ function table.keys(t)      end  end +-- local function compare(a,b) +--     local ta = type(a) -- needed, else 11 < 2 +--     local tb = type(b) -- needed, else 11 < 2 +--     if ta == tb and ta == "number" then +--         return a < b +--     else +--         return tostring(a) < tostring(b) -- not that efficient +--     end +-- end + +-- local function compare(a,b) +--     local ta = type(a) -- needed, else 11 < 2 +--     local tb = type(b) -- needed, else 11 < 2 +--     if ta == tb and (ta == "number" or ta == "string") then +--         return a < b +--     else +--         return tostring(a) < tostring(b) -- not that efficient +--     end +-- end + +-- local function sortedkeys(tab) +--     if tab then +--         local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed +--         for key in next, tab do +--             s = s + 1 +--             srt[s] = key +--             if category == 3 then +--                 -- no further check +--             else +--                 local tkey = type(key) +--                 if tkey == "string" then +--                     category = (category == 2 and 3) or 1 +--                 elseif tkey == "number" then +--                     category = (category == 1 and 3) or 2 +--                 else +--                     category = 3 +--                 end +--             end +--         end +--         if category == 0 or category == 3 then +--             sort(srt,compare) +--         else +--             sort(srt) +--         end +--         return srt +--     else +--         return { } +--     end +-- end + +-- local function compare(a,b) +--     local ta = type(a) -- needed, else 11 < 2 +--     local tb = type(b) -- needed, else 11 < 2 +--     if ta == tb and (ta == "number" or ta == "string") then +--         return a < b +--     else +--         return tostring(a) < tostring(b) -- not that efficient +--     end +-- end + +-- local function compare(a,b) +--     local ta = type(a) -- needed, else 11 < 2 +--     if ta == "number" or ta == "string" then +--         local tb = type(b) -- needed, else 11 < 2 +--         if ta == tb then +--             return a < b +--         end +--     end +--     return tostring(a) < tostring(b) -- not that efficient +-- end +  local function compare(a,b) -    local ta, tb = type(a), type(b) -- needed, else 11 < 2 -    if ta == tb then -        return a < b -    else -        return tostring(a) < tostring(b) +    local ta = type(a) -- needed, else 11 < 2 +    if ta == "number" then +        local tb = type(b) -- needed, else 11 < 2 +        if ta == tb then +            return a < b +        elseif tb == "string" then +            return tostring(a) < b +        end +    elseif ta == "string" then +        local tb = type(b) -- needed, else 11 < 2 +        if ta == tb then +            return a < b +        else +            return a < tostring(b) +        end      end +    return tostring(a) < tostring(b) -- not that efficient  end  local function sortedkeys(tab)      if tab then          local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed -        for key,_ in next, tab do +        for key in next, tab do              s = s + 1              srt[s] = key              if category == 3 then                  -- no further check +            elseif category == 1 then +                if type(key) ~= "string" then +                    category = 3 +                end +            elseif category == 2 then +                if type(key) ~= "number" then +                    category = 3 +                end              else                  local tkey = type(key)                  if tkey == "string" then -                    category = (category == 2 and 3) or 1 +                    category = 1                  elseif tkey == "number" then -                    category = (category == 1 and 3) or 2 +                    category = 2                  else                      category = 3                  end              end          end -        if category == 0 or category == 3 then +        if s < 2 then +            -- nothing to sort +        elseif category == 3 then              sort(srt,compare)          else              sort(srt) @@ -91,13 +183,15 @@ end  local function sortedhashonly(tab)      if tab then          local srt, s = { }, 0 -        for key,_ in next, tab do +        for key in next, tab do              if type(key) == "string" then                  s = s + 1                  srt[s] = key              end          end -        sort(srt) +        if s > 1 then +            sort(srt) +        end          return srt      else          return { } @@ -107,13 +201,15 @@ end  local function sortedindexonly(tab)      if tab then          local srt, s = { }, 0 -        for key,_ in next, tab do +        for key in next, tab do              if type(key) == "number" then                  s = s + 1                  srt[s] = key              end          end -        sort(srt) +        if s > 1 then +            sort(srt) +        end          return srt      else          return { } @@ -123,13 +219,15 @@ end  local function sortedhashkeys(tab,cmp) -- fast one      if tab then          local srt, s = { }, 0 -        for key,_ in next, tab do +        for key in next, tab do              if key then                  s= s + 1                  srt[s] = key              end          end -        sort(srt,cmp) +        if s > 1 then +            sort(srt,cmp) +        end          return srt      else          return { } @@ -139,7 +237,7 @@ end  function table.allkeys(t)      local keys = { }      for k, v in next, t do -        for k, v in next, v do +        for k in next, v do              keys[k] = true          end      end @@ -162,19 +260,21 @@ local function sortedhash(t,cmp)          else              s = sortedkeys(t) -- the robust one          end -        local n = 0          local m = #s -        local function kv() -- (s) -            if n < m then -                n = n + 1 -                local k = s[n] -                return k, t[k] +        if m == 1 then +            return next, t +        elseif m > 0 then +            local n = 0 +            return function() +                if n < m then +                    n = n + 1 +                    local k = s[n] +                    return k, t[k] +                end              end          end -        return kv -- , s -    else -        return nothing      end +    return nothing  end  table.sortedhash  = sortedhash @@ -318,7 +418,7 @@ end  local function copy(t, tables) -- taken from lua wiki, slightly adapted      tables = tables or { } -    local tcopy = {} +    local tcopy = { }      if not tables[t] then          tables[t] = tcopy      end @@ -378,7 +478,7 @@ function table.fromhash(t)      return hsh  end -local noquotes, hexify, handle, reduce, compact, inline, functions +local noquotes, hexify, handle, compact, inline, functions  local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key      'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', @@ -386,33 +486,67 @@ local reserved = table.tohash { -- intercept a language inconvenience: no reserv      'NaN', 'goto',  } +-- local function simple_table(t) +--     if #t > 0 then +--         local n = 0 +--         for _,v in next, t do +--             n = n + 1 +--         end +--         if n == #t then +--             local tt, nt = { }, 0 +--             for i=1,#t do +--                 local v = t[i] +--                 local tv = type(v) +--                 if tv == "number" then +--                     nt = nt + 1 +--                     if hexify then +--                         tt[nt] = format("0x%X",v) +--                     else +--                         tt[nt] = tostring(v) -- tostring not needed +--                     end +--                 elseif tv == "string" then +--                     nt = nt + 1 +--                     tt[nt] = format("%q",v) +--                 elseif tv == "boolean" then +--                     nt = nt + 1 +--                     tt[nt] = v and "true" or "false" +--                 else +--                     return nil +--                 end +--             end +--             return tt +--         end +--     end +--     return nil +-- end +  local function simple_table(t) -    if #t > 0 then +    local nt = #t +    if nt > 0 then          local n = 0          for _,v in next, t do              n = n + 1 +         -- if type(v) == "table" then +         --     return nil +         -- end          end -        if n == #t then -            local tt, nt = { }, 0 -            for i=1,#t do +        if n == nt then +            local tt = { } +            for i=1,nt do                  local v = t[i]                  local tv = type(v)                  if tv == "number" then -                    nt = nt + 1                      if hexify then -                        tt[nt] = format("0x%X",v) +                        tt[i] = format("0x%X",v)                      else -                        tt[nt] = tostring(v) -- tostring not needed +                        tt[i] = tostring(v) -- tostring not needed                      end                  elseif tv == "string" then -                    nt = nt + 1 -                    tt[nt] = format("%q",v) +                    tt[i] = format("%q",v)                  elseif tv == "boolean" then -                    nt = nt + 1 -                    tt[nt] = v and "true" or "false" +                    tt[i] = v and "true" or "false"                  else -                    tt = nil -                    break +                    return nil                  end              end              return tt @@ -469,15 +603,7 @@ local function do_serialize(root,name,depth,level,indexed)          end      end      -- we could check for k (index) being number (cardinal) -    if root and next(root) then -     -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) -     -- if compact then -     --     -- NOT: for k=1,#root do (we need to quit at nil) -     --     for k,v in ipairs(root) do -- can we use next? -     --         if not first then first = k end -     --         last = last + 1 -     --     end -     -- end +    if root and next(root) ~= nil then          local first, last = nil, 0          if compact then              last = #root @@ -493,12 +619,10 @@ local function do_serialize(root,name,depth,level,indexed)          end          local sk = sortedkeys(root)          for i=1,#sk do -            local k = sk[i] -            local v = root[k] -            --~ if v == root then -                -- circular -            --~ else -            local tv, tk = type(v), type(k) +            local k  = sk[i] +            local v  = root[k] +            local tv = type(v) +            local tk = type(k)              if compact and first and tk == "number" and k >= first and k <= last then                  if tv == "number" then                      if hexify then @@ -507,13 +631,9 @@ local function do_serialize(root,name,depth,level,indexed)                          handle(format("%s %s,",depth,v)) -- %.99g                      end                  elseif tv == "string" then -                    if reduce and tonumber(v) then -                        handle(format("%s %s,",depth,v)) -                    else -                        handle(format("%s %q,",depth,v)) -                    end +                    handle(format("%s %q,",depth,v))                  elseif tv == "table" then -                    if not next(v) then +                    if next(v) == nil then                          handle(format("%s {},",depth))                      elseif inline then -- and #t > 0                          local st = simple_table(v) @@ -567,37 +687,21 @@ local function do_serialize(root,name,depth,level,indexed)                      end                  end              elseif tv == "string" then -                if reduce and tonumber(v) then -                    if tk == "number" then -                        if hexify then -                            handle(format("%s [0x%X]=%s,",depth,k,v)) -                        else -                            handle(format("%s [%s]=%s,",depth,k,v)) -                        end -                    elseif tk == "boolean" then -                        handle(format("%s [%s]=%s,",depth,k and "true" or "false",v)) -                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then -                        handle(format("%s %s=%s,",depth,k,v)) +                if tk == "number" then +                    if hexify then +                        handle(format("%s [0x%X]=%q,",depth,k,v))                      else -                        handle(format("%s [%q]=%s,",depth,k,v)) +                        handle(format("%s [%s]=%q,",depth,k,v))                      end +                elseif tk == "boolean" then +                    handle(format("%s [%s]=%q,",depth,k and "true" or "false",v)) +                elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +                    handle(format("%s %s=%q,",depth,k,v))                  else -                    if tk == "number" then -                        if hexify then -                            handle(format("%s [0x%X]=%q,",depth,k,v)) -                        else -                            handle(format("%s [%s]=%q,",depth,k,v)) -                        end -                    elseif tk == "boolean" then -                        handle(format("%s [%s]=%q,",depth,k and "true" or "false",v)) -                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then -                        handle(format("%s %s=%q,",depth,k,v)) -                    else -                        handle(format("%s [%q]=%q,",depth,k,v)) -                    end +                    handle(format("%s [%q]=%q,",depth,k,v))                  end              elseif tv == "table" then -                if not next(v) then +                if next(v) == nil then                      if tk == "number" then                          if hexify then                              handle(format("%s [0x%X]={},",depth,k)) @@ -680,10 +784,9 @@ local function do_serialize(root,name,depth,level,indexed)                      handle(format("%s [%q]=%q,",depth,k,tostring(v)))                  end              end -            --~ end          end      end -   if level > 0 then +    if level > 0 then          handle(format("%s},",depth))      end  end @@ -697,7 +800,6 @@ local function serialize(_handle,root,name,specification) -- handle wins          noquotes  = specification.noquotes          hexify    = specification.hexify          handle    = _handle or specification.handle or print -        reduce    = specification.reduce or false          functions = specification.functions          compact   = specification.compact          inline    = specification.inline and compact @@ -714,7 +816,6 @@ local function serialize(_handle,root,name,specification) -- handle wins          noquotes  = false          hexify    = false          handle    = _handle or print -        reduce    = false          compact   = true          inline    = true          functions = true @@ -748,7 +849,7 @@ local function serialize(_handle,root,name,specification) -- handle wins              root._w_h_a_t_e_v_e_r_ = nil          end          -- Let's forget about empty tables. -        if next(root) then +        if next(root) ~= nil then              do_serialize(root,name,"",0)          end      end @@ -788,15 +889,6 @@ end  table.tohandle = serialize --- sometimes tables are real use (zapfino extra pro is some 85M) in which --- case a stepwise serialization is nice; actually, we could consider: --- --- for line in table.serializer(root,name,reduce,noquotes) do ---    ...(line) --- end --- --- so this is on the todo list -  local maxtab = 2*1024  function table.tofile(filename,root,name,specification) @@ -928,7 +1020,7 @@ local function sparse(old,nest,keeptables)          if not (v == "" or v == false) then              if nest and type(v) == "table" then                  v = sparse(v,nest) -                if keeptables or next(v) then +                if keeptables or next(v) ~= nil then                      new[k] = v                  end              else @@ -1052,7 +1144,7 @@ function table.print(t,...)  end  if setinspector then -    setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end) +    setinspector("table",function(v) if type(v) == "table" then serialize(print,v,"table") return true end end)  end  -- -- -- obsolete but we keep them for a while and might comment them later -- -- -- @@ -1066,11 +1158,11 @@ end  -- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice)  function table.is_empty(t) -    return not t or not next(t) +    return not t or next(t) == nil  end  function table.has_one_entry(t) -    return t and not next(t,next(t)) +    return t and next(t,next(t)) == nil  end  -- new @@ -1157,7 +1249,7 @@ function table.filtered(t,pattern,sort,cmp)          else              local n = next(t)              local function iterator() -                while n do +                while n ~= nil do                      local k = n                      n = next(t,k)                      if find(k,pattern) then diff --git a/lualibs-trac-inf.lua b/lualibs-trac-inf.lua index 034726f..5497e54 100644 --- a/lualibs-trac-inf.lua +++ b/lualibs-trac-inf.lua @@ -207,17 +207,3 @@ function statistics.tracefunction(base,tag,...)          statistics.register(formatters["%s.%s"](tag,name),function() return serialize(stat,"calls") end)      end  end - --- where, not really the best spot for this: - -commands = commands or { } - -function commands.resettimer(name) -    resettiming(name or "whatever") -    starttiming(name or "whatever") -end - -function commands.elapsedtime(name) -    stoptiming(name or "whatever") -    context(elapsedtime(name or "whatever")) -end diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua index fb4ea37..70b6032 100644 --- a/lualibs-unicode.lua +++ b/lualibs-unicode.lua @@ -56,7 +56,6 @@ local p_utfbom        = patterns.utfbom  local p_newline       = patterns.newline  local p_whitespace    = patterns.whitespace -  if not unicode then      unicode = { utf = utf } -- for a while @@ -419,9 +418,11 @@ if not utf.sub then          end      end -    local pattern_zero = Cmt(p_utf8char,slide_zero)^0 -    local pattern_one  = Cmt(p_utf8char,slide_one )^0 -    local pattern_two  = Cmt(p_utf8char,slide_two )^0 +    local pattern_zero  = Cmt(p_utf8char,slide_zero)^0 +    local pattern_one   = Cmt(p_utf8char,slide_one )^0 +    local pattern_two   = Cmt(p_utf8char,slide_two )^0 + +    local pattern_first = C(patterns.utf8character)      function utf.sub(str,start,stop)          if not start then @@ -464,7 +465,9 @@ if not utf.sub then                  end              end          end -        if start > stop then +        if start == 1 and stop == 1 then +            return lpegmatch(pattern_first,str) or "" +        elseif start > stop then              return ""          elseif start > 1 then              b, e, n, first, last = 0, 0, 0, start - 1, stop @@ -525,23 +528,59 @@ end  --     end, pattern  -- end -function utf.remapper(mapping) -    local pattern = type(mapping) == "table" and tabletopattern(mapping) or p_utf8char -    local pattern = Cs((pattern/mapping + p_utf8char)^0) -    return function(str) -        if not str or str == "" then -            return "" +function utf.remapper(mapping,option) -- static also returns a pattern +    local variant = type(mapping) +    if variant == "table" then +        if option == "dynamic" then +            local pattern = false +            table.setmetatablenewindex(mapping,function(t,k,v) rawset(t,k,v) pattern = false end) +            return function(str) +                if not str or str == "" then +                    return "" +                else +                    if not pattern then +                        pattern = Cs((tabletopattern(mapping)/mapping + p_utf8char)^0) +                    end +                    return lpegmatch(pattern,str) +                end +            end +        elseif option == "pattern" then +            return Cs((tabletopattern(mapping)/mapping + p_utf8char)^0) +     -- elseif option == "static" then +        else +            local pattern = Cs((tabletopattern(mapping)/mapping + p_utf8char)^0) +            return function(str) +                if not str or str == "" then +                    return "" +                else +                    return lpegmatch(pattern,str) +                end +            end, pattern +        end +    elseif variant == "function" then +        if option == "pattern" then +            return Cs((p_utf8char/mapping + p_utf8char)^0)          else -            return lpegmatch(pattern,str) +            local pattern = Cs((p_utf8char/mapping + p_utf8char)^0) +            return function(str) +                if not str or str == "" then +                    return "" +                else +                    return lpegmatch(pattern,str) +                end +            end, pattern          end -    end, pattern +    else +        -- is actually an error +        return function(str) +            return str or "" +        end +    end  end  -- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" }  -- print(remap("abcd 1234 abcd")) --- -  function utf.replacer(t) -- no precheck, always string builder      local r = replacer(t,false,false,true)      return function(str) @@ -647,285 +686,359 @@ end  local utf16_to_utf8_be, utf16_to_utf8_le  local utf32_to_utf8_be, utf32_to_utf8_le -local utf_16_be_linesplitter = patterns.utfbom_16_be^-1 * lpeg.tsplitat(patterns.utf_16_be_nl) -local utf_16_le_linesplitter = patterns.utfbom_16_le^-1 * lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_16_be_getbom = patterns.utfbom_16_be^-1 +local utf_16_le_getbom = patterns.utfbom_16_le^-1 +local utf_32_be_getbom = patterns.utfbom_32_be^-1 +local utf_32_le_getbom = patterns.utfbom_32_le^-1 + +local utf_16_be_linesplitter = utf_16_be_getbom * lpeg.tsplitat(patterns.utf_16_be_nl) +local utf_16_le_linesplitter = utf_16_le_getbom * lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_32_be_linesplitter = utf_32_be_getbom * lpeg.tsplitat(patterns.utf_32_be_nl) +local utf_32_le_linesplitter = utf_32_le_getbom * lpeg.tsplitat(patterns.utf_32_le_nl) + +-- we have three possibilities: bytepairs (using tables), gmatch (using tables), gsub and +-- lpeg. Bytepairs are the fastert but as soon as we need to remove bombs and so the gain +-- is less due to more testing. Also, we seldom have to convert utf16 so we don't care to +-- much about a few  milliseconds more runtime. The lpeg variant is upto 20% slower but +-- still pretty fast. +-- +-- for historic resone we keep the bytepairs variants around .. beware they don't grab the +-- bom like the lpegs do so they're not dropins in the functions that follow +-- +-- utf16_to_utf8_be = function(s) +--     if not s then +--         return nil +--     elseif s == "" then +--         return "" +--     end +--     local result, r, more = { }, 0, 0 +--     for left, right in bytepairs(s) do +--         if right then +--             local now = 256*left + right +--             if more > 0 then +--                 now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +--                 more = 0 +--                 r = r + 1 +--                 result[r] = utfchar(now) +--             elseif now >= 0xD800 and now <= 0xDBFF then +--                 more = now +--             else +--                 r = r + 1 +--                 result[r] = utfchar(now) +--             end +--         end +--     end +--     return concat(result) +-- end +-- +-- local utf16_to_utf8_be_t = function(t) +--     if not t then +--         return nil +--     elseif type(t) == "string" then +--         t = lpegmatch(utf_16_be_linesplitter,t) +--     end +--     local result = { } -- we reuse result +--     for i=1,#t do +--         local s = t[i] +--         if s ~= "" then +--             local r, more = 0, 0 +--             for left, right in bytepairs(s) do +--                 if right then +--                     local now = 256*left + right +--                     if more > 0 then +--                         now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +--                         more = 0 +--                         r = r + 1 +--                         result[r] = utfchar(now) +--                     elseif now >= 0xD800 and now <= 0xDBFF then +--                         more = now +--                     else +--                         r = r + 1 +--                         result[r] = utfchar(now) +--                     end +--                 end +--             end +--             t[i] = concat(result,"",1,r) -- we reused tmp, hence t +--         end +--     end +--     return t +-- end +-- +-- utf16_to_utf8_le = function(s) +--     if not s then +--         return nil +--     elseif s == "" then +--         return "" +--     end +--     local result, r, more = { }, 0, 0 +--     for left, right in bytepairs(s) do +--         if right then +--             local now = 256*right + left +--             if more > 0 then +--                 now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +--                 more = 0 +--                 r = r + 1 +--                 result[r] = utfchar(now) +--             elseif now >= 0xD800 and now <= 0xDBFF then +--                 more = now +--             else +--                 r = r + 1 +--                 result[r] = utfchar(now) +--             end +--         end +--     end +--     return concat(result) +-- end +-- +-- local utf16_to_utf8_le_t = function(t) +--     if not t then +--         return nil +--     elseif type(t) == "string" then +--         t = lpegmatch(utf_16_le_linesplitter,t) +--     end +--     local result = { } -- we reuse result +--     for i=1,#t do +--         local s = t[i] +--         if s ~= "" then +--             local r, more = 0, 0 +--             for left, right in bytepairs(s) do +--                 if right then +--                     local now = 256*right + left +--                     if more > 0 then +--                         now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +--                         more = 0 +--                         r = r + 1 +--                         result[r] = utfchar(now) +--                     elseif now >= 0xD800 and now <= 0xDBFF then +--                         more = now +--                     else +--                         r = r + 1 +--                         result[r] = utfchar(now) +--                     end +--                 end +--             end +--             t[i] = concat(result,"",1,r) -- we reused tmp, hence t +--         end +--     end +--     return t +-- end +-- +-- local utf32_to_utf8_be_t = function(t) +--     if not t then +--         return nil +--     elseif type(t) == "string" then +--         t = lpegmatch(utflinesplitter,t) +--     end +--     local result = { } -- we reuse result +--     for i=1,#t do +--         local r, more = 0, -1 +--         for a,b in bytepairs(t[i]) do +--             if a and b then +--                 if more < 0 then +--                     more = 256*256*256*a + 256*256*b +--                 else +--                     r = r + 1 +--                     result[t] = utfchar(more + 256*a + b) +--                     more = -1 +--                 end +--             else +--                 break +--             end +--         end +--         t[i] = concat(result,"",1,r) +--     end +--     return t +-- end +-- +-- local utf32_to_utf8_le_t = function(t) +--     if not t then +--         return nil +--     elseif type(t) == "string" then +--         t = lpegmatch(utflinesplitter,t) +--     end +--     local result = { } -- we reuse result +--     for i=1,#t do +--         local r, more = 0, -1 +--         for a,b in bytepairs(t[i]) do +--             if a and b then +--                 if more < 0 then +--                     more = 256*b + a +--                 else +--                     r = r + 1 +--                     result[t] = utfchar(more + 256*256*256*b + 256*256*a) +--                     more = -1 +--                 end +--             else +--                 break +--             end +--         end +--         t[i] = concat(result,"",1,r) +--     end +--     return t +-- end + +local more = 0 + +local p_utf16_to_utf8_be = C(1) * C(1) /function(left,right) +    local now = 256*byte(left) + byte(right) +    if more > 0 then +        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +        more = 0 +        return utfchar(now) +    elseif now >= 0xD800 and now <= 0xDBFF then +        more = now +        return "" -- else the c's end up in the stream +    else +        return utfchar(now) +    end +end --- we have three possibilities: +local p_utf16_to_utf8_le = C(1) * C(1) /function(right,left) +    local now = 256*byte(left) + byte(right) +    if more > 0 then +        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +        more = 0 +        return utfchar(now) +    elseif now >= 0xD800 and now <= 0xDBFF then +        more = now +        return "" -- else the c's end up in the stream +    else +        return utfchar(now) +    end +end +local p_utf32_to_utf8_be = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d) +    return utfchar(256*256*256*byte(a) + 256*256*byte(b) + 256*byte(c) + byte(d)) +end --- bytepairs: 0.048 --- gmatch   : 0.069 --- lpeg     : 0.089 (match time captures) +local p_utf32_to_utf8_le = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d) +    return utfchar(256*256*256*byte(d) + 256*256*byte(c) + 256*byte(b) + byte(a)) +end -if bytepairs then +p_utf16_to_utf8_be = P(true) / function() more = 0 end * utf_16_be_getbom * Cs(p_utf16_to_utf8_be^0) +p_utf16_to_utf8_le = P(true) / function() more = 0 end * utf_16_le_getbom * Cs(p_utf16_to_utf8_le^0) +p_utf32_to_utf8_be = P(true) / function() more = 0 end * utf_32_be_getbom * Cs(p_utf32_to_utf8_be^0) +p_utf32_to_utf8_le = P(true) / function() more = 0 end * utf_32_le_getbom * Cs(p_utf32_to_utf8_le^0) -    -- with a little bit more code we could include the linesplitter +patterns.utf16_to_utf8_be = p_utf16_to_utf8_be +patterns.utf16_to_utf8_le = p_utf16_to_utf8_le +patterns.utf32_to_utf8_be = p_utf32_to_utf8_be +patterns.utf32_to_utf8_le = p_utf32_to_utf8_le -    utf16_to_utf8_be = function(t) -        if type(t) == "string" then -            t = lpegmatch(utf_16_be_linesplitter,t) -        end -        local result = { } -- we reuse result -        for i=1,#t do -            local r, more = 0, 0 -            for left, right in bytepairs(t[i]) do -                if right then -                    local now = 256*left + right -                    if more > 0 then -                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong -                        more = 0 -                        r = r + 1 -                        result[r] = utfchar(now) -                    elseif now >= 0xD800 and now <= 0xDBFF then -                        more = now -                    else -                        r = r + 1 -                        result[r] = utfchar(now) -                    end -                end -            end -            t[i] = concat(result,"",1,r) -- we reused tmp, hence t -        end -        return t +utf16_to_utf8_be = function(s) +    if s and s ~= "" then +        return lpegmatch(p_utf16_to_utf8_be,s) +    else +        return s      end +end -    utf16_to_utf8_le = function(t) -        if type(t) == "string" then -            t = lpegmatch(utf_16_le_linesplitter,t) -        end -        local result = { } -- we reuse result -        for i=1,#t do -            local r, more = 0, 0 -            for left, right in bytepairs(t[i]) do -                if right then -                    local now = 256*right + left -                    if more > 0 then -                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong -                        more = 0 -                        r = r + 1 -                        result[r] = utfchar(now) -                    elseif now >= 0xD800 and now <= 0xDBFF then -                        more = now -                    else -                        r = r + 1 -                        result[r] = utfchar(now) -                    end -                end -            end -            t[i] = concat(result,"",1,r) -- we reused tmp, hence t +local utf16_to_utf8_be_t = function(t) +    if not t then +        return nil +    elseif type(t) == "string" then +        t = lpegmatch(utf_16_be_linesplitter,t) +    end +    for i=1,#t do +        local s = t[i] +        if s ~= "" then +            t[i] = lpegmatch(p_utf16_to_utf8_be,s)          end -        return t      end +    return t +end -    utf32_to_utf8_be = function(t) -        if type(t) == "string" then -            t = lpegmatch(utflinesplitter,t) -        end -        local result = { } -- we reuse result -        for i=1,#t do -            local r, more = 0, -1 -            for a,b in bytepairs(t[i]) do -                if a and b then -                    if more < 0 then -                        more = 256*256*256*a + 256*256*b -                    else -                        r = r + 1 -                        result[t] = utfchar(more + 256*a + b) -                        more = -1 -                    end -                else -                    break -                end -            end -            t[i] = concat(result,"",1,r) -        end -        return t +utf16_to_utf8_le = function(s) +    if s and s ~= "" then +        return lpegmatch(p_utf16_to_utf8_le,s) +    else +        return s      end +end -    utf32_to_utf8_le = function(t) -        if type(t) == "string" then -            t = lpegmatch(utflinesplitter,t) -        end -        local result = { } -- we reuse result -        for i=1,#t do -            local r, more = 0, -1 -            for a,b in bytepairs(t[i]) do -                if a and b then -                    if more < 0 then -                        more = 256*b + a -                    else -                        r = r + 1 -                        result[t] = utfchar(more + 256*256*256*b + 256*256*a) -                        more = -1 -                    end -                else -                    break -                end -            end -            t[i] = concat(result,"",1,r) +local utf16_to_utf8_le_t = function(t) +    if not t then +        return nil +    elseif type(t) == "string" then +        t = lpegmatch(utf_16_le_linesplitter,t) +    end +    for i=1,#t do +        local s = t[i] +        if s ~= "" then +            t[i] = lpegmatch(p_utf16_to_utf8_le,s)          end -        return t      end +    return t +end -else - -    utf16_to_utf8_be = function(t) -        if type(t) == "string" then -            t = lpegmatch(utf_16_be_linesplitter,t) -        end -        local result = { } -- we reuse result -        for i=1,#t do -            local r, more = 0, 0 -            for left, right in gmatch(t[i],"(.)(.)") do -                if left == "\000" then -- experiment -                    r = r + 1 -                    result[r] = utfchar(byte(right)) -                elseif right then -                    local now = 256*byte(left) + byte(right) -                    if more > 0 then -                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong -                        more = 0 -                        r = r + 1 -                        result[r] = utfchar(now) -                    elseif now >= 0xD800 and now <= 0xDBFF then -                        more = now -                    else -                        r = r + 1 -                        result[r] = utfchar(now) -                    end -                end -            end -            t[i] = concat(result,"",1,r) -- we reused tmp, hence t -        end -        return t +utf32_to_utf8_be = function(s) +    if s and s ~= "" then +        return lpegmatch(p_utf32_to_utf8_be,s) +    else +        return s      end +end -    utf16_to_utf8_le = function(t) -        if type(t) == "string" then -            t = lpegmatch(utf_16_le_linesplitter,t) -        end -        local result = { } -- we reuse result -        for i=1,#t do -            local r, more = 0, 0 -            for left, right in gmatch(t[i],"(.)(.)") do -                if right == "\000" then -                    r = r + 1 -                    result[r] = utfchar(byte(left)) -                elseif right then -                    local now = 256*byte(right) + byte(left) -                    if more > 0 then -                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong -                        more = 0 -                        r = r + 1 -                        result[r] = utfchar(now) -                    elseif now >= 0xD800 and now <= 0xDBFF then -                        more = now -                    else -                        r = r + 1 -                        result[r] = utfchar(now) -                    end -                end -            end -            t[i] = concat(result,"",1,r) -- we reused tmp, hence t +local utf32_to_utf8_be_t = function(t) +    if not t then +        return nil +    elseif type(t) == "string" then +        t = lpegmatch(utf_32_be_linesplitter,t) +    end +    for i=1,#t do +        local s = t[i] +        if s ~= "" then +            t[i] = lpegmatch(p_utf32_to_utf8_be,s)          end -        return t      end +    return t +end -    utf32_to_utf8_le = function() return { } end -- never used anyway -    utf32_to_utf8_be = function() return { } end -- never used anyway - -    -- the next one is slighty slower - -    -- local result, lines, r, more = { }, { }, 0, 0 -    -- -    -- local simple = Cmt( -    --     C(1) * C(1), function(str,p,left,right) -    --         local now = 256*byte(left) + byte(right) -    --         if more > 0 then -    --             now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong -    --             more = 0 -    --             r = r + 1 -    --             result[r] = utfchar(now) -    --         elseif now >= 0xD800 and now <= 0xDBFF then -    --             more = now -    --         else -    --             r = r + 1 -    --             result[r] = utfchar(now) -    --         end -    --         return p -    --    end -    -- ) -    -- -    -- local complex = Cmt( -    --     C(1) * C(1), function(str,p,left,right) -    --         local now = 256*byte(left) + byte(right) -    --         if more > 0 then -    --             now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong -    --             more = 0 -    --             r = r + 1 -    --             result[r] = utfchar(now) -    --         elseif now >= 0xD800 and now <= 0xDBFF then -    --             more = now -    --         else -    --             r = r + 1 -    --             result[r] = utfchar(now) -    --         end -    --         return p -    --    end -    -- ) -    -- -    -- local lineend = Cmt ( -    --     patterns.utf_16_be_nl, function(str,p) -    --         lines[#lines+1] = concat(result,"",1,r) -    --         r, more = 0, 0 -    --         return p -    --     end -    -- ) -    -- -    -- local be_1 = patterns.utfbom_16_be^-1 * (simple + complex)^0 -    -- local be_2 = patterns.utfbom_16_be^-1 * (lineend + simple + complex)^0 -    -- -    -- utf16_to_utf8_be = function(t) -    --     if type(t) == "string" then -    --         local s = t -    --         lines, r, more = { }, 0, 0 -    --         lpegmatch(be_2,s) -    --         if r > 0 then -    --             lines[#lines+1] = concat(result,"",1,r) -    --         end -    --         result = { } -    --         return lines -    --     else -    --         for i=1,#t do -    --             r, more = 0, 0 -    --             lpegmatch(be_1,t[i]) -    --             t[i] = concat(result,"",1,r) -    --         end -    --         result = { } -    --         return t -    --     end -    -- end +utf32_to_utf8_le = function(s) +    if s and s ~= "" then +        return lpegmatch(p_utf32_to_utf8_le,s) +    else +        return s +    end +end +local utf32_to_utf8_le_t = function(t) +    if not t then +        return nil +    elseif type(t) == "string" then +        t = lpegmatch(utf_32_le_linesplitter,t) +    end +    for i=1,#t do +        local s = t[i] +        if s ~= "" then +            t[i] = lpegmatch(p_utf32_to_utf8_le,s) +        end +    end +    return t  end -utf.utf16_to_utf8_le = utf16_to_utf8_le -utf.utf16_to_utf8_be = utf16_to_utf8_be -utf.utf32_to_utf8_le = utf32_to_utf8_le -utf.utf32_to_utf8_be = utf32_to_utf8_be +utf.utf16_to_utf8_le_t = utf16_to_utf8_le_t +utf.utf16_to_utf8_be_t = utf16_to_utf8_be_t +utf.utf32_to_utf8_le_t = utf32_to_utf8_le_t +utf.utf32_to_utf8_be_t = utf32_to_utf8_be_t -function utf.utf8_to_utf8(t) +utf.utf16_to_utf8_le   = utf16_to_utf8_le +utf.utf16_to_utf8_be   = utf16_to_utf8_be +utf.utf32_to_utf8_le   = utf32_to_utf8_le +utf.utf32_to_utf8_be   = utf32_to_utf8_be + +function utf.utf8_to_utf8_t(t)      return type(t) == "string" and lpegmatch(utflinesplitter,t) or t  end -function utf.utf16_to_utf8(t,endian) -    return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +function utf.utf16_to_utf8_t(t,endian) +    return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t  end -function utf.utf32_to_utf8(t,endian) -    return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t +function utf.utf32_to_utf8_t(t,endian) +    return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t  end -local function little(c) -    local b = byte(c) +local function little(b)      if b < 0x10000 then          return char(b%256,b/256)      else @@ -935,8 +1048,7 @@ local function little(c)      end  end -local function big(c) -    local b = byte(c) +local function big(b)      if b < 0x10000 then          return char(b/256,b%256)      else @@ -946,18 +1058,10 @@ local function big(c)      end  end --- function utf.utf8_to_utf16(str,littleendian) ---     if littleendian then ---         return char(255,254) .. utfgsub(str,".",little) ---     else ---         return char(254,255) .. utfgsub(str,".",big) ---     end --- end - -local _, l_remap = utf.remapper(little) -local _, b_remap = utf.remapper(big) +local l_remap = Cs((p_utf8byte/little+P(1)/"")^0) +local b_remap = Cs((p_utf8byte/big   +P(1)/"")^0) -function utf.utf8_to_utf16_be(str,nobom) +local function utf8_to_utf16_be(str,nobom)      if nobom then          return lpegmatch(b_remap,str)      else @@ -965,7 +1069,7 @@ function utf.utf8_to_utf16_be(str,nobom)      end  end -function utf.utf8_to_utf16_le(str,nobom) +local function utf8_to_utf16_le(str,nobom)      if nobom then          return lpegmatch(l_remap,str)      else @@ -973,11 +1077,14 @@ function utf.utf8_to_utf16_le(str,nobom)      end  end +utf.utf8_to_utf16_be = utf8_to_utf16_be +utf.utf8_to_utf16_le = utf8_to_utf16_le +  function utf.utf8_to_utf16(str,littleendian,nobom)      if littleendian then -        return utf.utf8_to_utf16_le(str,nobom) +        return utf8_to_utf16_le(str,nobom)      else -        return utf.utf8_to_utf16_be(str,nobom) +        return utf8_to_utf16_be(str,nobom)      end  end @@ -1008,16 +1115,16 @@ function utf.xstring(s)  end  function utf.toeight(str) -    if not str then +    if not str or str == "" then          return nil      end      local utftype = lpegmatch(p_utfstricttype,str)      if utftype == "utf-8" then -        return sub(str,4) -    elseif utftype == "utf-16-le" then -        return utf16_to_utf8_le(str) +        return sub(str,4)               -- remove the bom      elseif utftype == "utf-16-be" then -        return utf16_to_utf8_ne(str) +        return utf16_to_utf8_be(str)    -- bom gets removed +    elseif utftype == "utf-16-le" then +        return utf16_to_utf8_le(str)    -- bom gets removed      else          return str      end diff --git a/lualibs-url.lua b/lualibs-url.lua index 7bb7312..b189ec5 100644 --- a/lualibs-url.lua +++ b/lualibs-url.lua @@ -145,19 +145,25 @@ local splitquery = Cf ( Ct("") * P { "sequence",  -- hasher  local function hashed(str) -- not yet ok (/test?test) -    if str == "" then +    if not str or str == "" then          return {              scheme   = "invalid",              original = str,          }      end -    local s = split(str) -    local rawscheme  = s[1] -    local rawquery   = s[4] -    local somescheme = rawscheme ~= "" -    local somequery  = rawquery  ~= "" +    local detailed   = split(str) +    local rawscheme  = "" +    local rawquery   = "" +    local somescheme = false +    local somequery  = false +    if detailed then +        rawscheme  = detailed[1] +        rawquery   = detailed[4] +        somescheme = rawscheme ~= "" +        somequery  = rawquery  ~= "" +    end      if not somescheme and not somequery then -        s = { +        return {              scheme    = "file",              authority = "",              path      = str, @@ -167,30 +173,33 @@ local function hashed(str) -- not yet ok (/test?test)              noscheme  = true,              filename  = str,          } -    else -- not always a filename but handy anyway -        local authority, path, filename = s[2], s[3] -        if authority == "" then -            filename = path -        elseif path == "" then -            filename = "" -        else -            filename = authority .. "/" .. path -        end -        s = { -            scheme    = rawscheme, -            authority = authority, -            path      = path, -            query     = lpegmatch(unescaper,rawquery),  -- unescaped, but possible conflict with & and = -            queries   = lpegmatch(splitquery,rawquery), -- split first and then unescaped -            fragment  = s[5], -            original  = str, -            noscheme  = false, -            filename  = filename, -        }      end -    return s +    -- not always a filename but handy anyway +    local authority = detailed[2] +    local path      = detailed[3] +    local filename  = nil +    if authority == "" then +        filename = path +    elseif path == "" then +        filename = "" +    else +        filename = authority .. "/" .. path +    end +    return { +        scheme    = rawscheme, +        authority = authority, +        path      = path, +        query     = lpegmatch(unescaper,rawquery),  -- unescaped, but possible conflict with & and = +        queries   = lpegmatch(splitquery,rawquery), -- split first and then unescaped +        fragment  = detailed[5], +        original  = str, +        noscheme  = false, +        filename  = filename, +    }  end +-- inspect(hashed()) +-- inspect(hashed(""))  -- inspect(hashed("template:///test"))  -- inspect(hashed("template:///test++.whatever"))  -- inspect(hashed("template:///test%2B%2B.whatever")) @@ -247,7 +256,7 @@ function url.construct(hash) -- dodo: we need to escape !      return lpegmatch(escaper,concat(fullurl))  end -local pattern = Cs(noslash * R("az","AZ") * (S(":|")/":") * noslash * P(1)^0) +local pattern = Cs(slash^-1/"" * R("az","AZ") * ((S(":|")/":") + P(":")) * slash * P(1)^0)  function url.filename(filename)      local spec = hashed(filename) @@ -257,6 +266,7 @@ end  -- print(url.filename("/c|/test"))  -- print(url.filename("/c/test")) +-- print(url.filename("file:///t:/sources/cow.svg"))  local function escapestring(str)      return lpegmatch(escaper,str) diff --git a/lualibs-util-dim.lua b/lualibs-util-dim.lua index bfffb10..2bdb870 100644 --- a/lualibs-util-dim.lua +++ b/lualibs-util-dim.lua @@ -92,9 +92,9 @@ format (string) is implemented using this table.</p>  --ldx]]--  local f_none = formatters["%s%s"] -local f_true = formatters["%0.5f%s"] +local f_true = formatters["%0.5F%s"] -local function numbertodimen(n,unit,fmt) +local function numbertodimen(n,unit,fmt) -- will be redefined later !      if type(n) == 'string' then          return n      else diff --git a/lualibs-util-prs.lua b/lualibs-util-prs.lua index f51f6fc..a3c1c6f 100644 --- a/lualibs-util-prs.lua +++ b/lualibs-util-prs.lua @@ -21,6 +21,8 @@ parsers.patterns  = patterns  local setmetatableindex = table.setmetatableindex  local sortedhash        = table.sortedhash +local sortedkeys        = table.sortedkeys +local tohash            = table.tohash  -- we share some patterns @@ -94,9 +96,7 @@ patterns.settings_to_hash_b = pattern_b_s  patterns.settings_to_hash_c = pattern_c_s  function parsers.make_settings_to_hash_pattern(set,how) -    if type(str) == "table" then -        return set -    elseif how == "strict" then +    if how == "strict" then          return (pattern_c/set)^1      elseif how == "tolerant" then          return (pattern_b/set)^1 @@ -106,7 +106,9 @@ function parsers.make_settings_to_hash_pattern(set,how)  end  function parsers.settings_to_hash(str,existing) -    if type(str) == "table" then +    if not str or str == "" then +        return { } +    elseif type(str) == "table" then          if existing then              for k, v in next, str do                  existing[k] = v @@ -115,17 +117,17 @@ function parsers.settings_to_hash(str,existing)          else              return str          end -    elseif str and str ~= "" then +    else          hash = existing or { }          lpegmatch(pattern_a_s,str)          return hash -    else -        return { }      end  end  function parsers.settings_to_hash_tolerant(str,existing) -    if type(str) == "table" then +    if not str or str == "" then +        return { } +    elseif type(str) == "table" then          if existing then              for k, v in next, str do                  existing[k] = v @@ -134,17 +136,17 @@ function parsers.settings_to_hash_tolerant(str,existing)          else              return str          end -    elseif str and str ~= "" then +    else          hash = existing or { }          lpegmatch(pattern_b_s,str)          return hash -    else -        return { }      end  end  function parsers.settings_to_hash_strict(str,existing) -    if type(str) == "table" then +    if not str or str == "" then +        return nil +    elseif type(str) == "table" then          if existing then              for k, v in next, str do                  existing[k] = v @@ -157,8 +159,6 @@ function parsers.settings_to_hash_strict(str,existing)          hash = existing or { }          lpegmatch(pattern_c_s,str)          return next(hash) and hash -    else -        return nil      end  end @@ -167,17 +167,17 @@ local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)                  + C((nestedbraces + (1-comma))^0)  local pattern   = spaces * Ct(value*(separator*value)^0) --- "aap, {noot}, mies" : outer {} removes, leading spaces ignored +-- "aap, {noot}, mies" : outer {} removed, leading spaces ignored  patterns.settings_to_array = pattern  -- we could use a weak table as cache  function parsers.settings_to_array(str,strict) -    if type(str) == "table" then -        return str -    elseif not str or str == "" then +    if not str or str == "" then          return { } +    elseif type(str) == "table" then +        return str      elseif strict then          if find(str,"{",1,true) then              return lpegmatch(pattern,str) @@ -195,12 +195,40 @@ end  --  -- "{123} , 456  " -> "123" "456" -local separator = space^0 * comma * space^0 -local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) -                + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0) -local withvalue = Carg(1) * value / function(f,s) return f(s) end -local pattern_a = spaces * Ct(value*(separator*value)^0) -local pattern_b = spaces * withvalue * (separator*withvalue)^0 +-- local separator = space^0 * comma * space^0 +-- local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) +--                 + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0) +-- local withvalue = Carg(1) * value / function(f,s) return f(s) end +-- local pattern_a = spaces * Ct(value*(separator*value)^0) +-- local pattern_b = spaces * withvalue * (separator*withvalue)^0 + +local cache_a = { } +local cache_b = { } + +function parsers.groupedsplitat(symbol,withaction) +    if not symbol then +        symbol = "," +    end +    local pattern = (withaction and cache_b or cache_a)[symbol] +    if not pattern then +        local symbols   = S(symbol) +        local separator = space^0 * symbols * space^0 +        local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) +                        + C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0) +        if withaction then +            local withvalue = Carg(1) * value / function(f,s) return f(s) end +            pattern = spaces * withvalue * (separator*withvalue)^0 +            cache_b[symbol] = pattern +        else +            pattern = spaces * Ct(value*(separator*value)^0) +            cache_a[symbol] = pattern +        end +    end +    return pattern +end + +local pattern_a = parsers.groupedsplitat(",",false) +local pattern_b = parsers.groupedsplitat(",",true)  function parsers.stripped_settings_to_array(str)      if not str or str == "" then @@ -221,8 +249,6 @@ end  -- parsers.process_stripped_settings("{123} , 456  ",function(s) print("["..s.."]") end)  -- parsers.process_stripped_settings("123 , 456  ",function(s) print("["..s.."]") end) --- -  local function set(t,v)      t[#t+1] = v  end @@ -236,8 +262,8 @@ end  function parsers.hash_to_string(h,separator,yes,no,strict,omit)      if h then -        local t, tn, s = { }, 0, table.sortedkeys(h) -        omit = omit and table.tohash(omit) +        local t, tn, s = { }, 0, sortedkeys(h) +        omit = omit and tohash(omit)          for i=1,#s do              local key = s[i]              if not omit or not omit[key] then @@ -275,15 +301,25 @@ function parsers.array_to_string(a,separator)      end  end -function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway -    t = t or { } ---  for s in gmatch(str,"%s*([^, ]+)") do -- space added -    for s in gmatch(str,"[^, ]+") do -- space added -        t[s] = true -    end -    return t +-- function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway +--     if str then +--         t = t or { } +--         for s in gmatch(str,"[^, ]+") do -- space added +--             t[s] = true +--         end +--         return t +--     else +--         return { } +--     end +-- end + +local pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true))^1,rawset) + +function utilities.parsers.settings_to_set(str,t) +    return str and lpegmatch(pattern,str) or { }  end +  function parsers.simple_hash_to_string(h, separator)      local t, tn = { }, 0      for k, v in sortedhash(h) do @@ -297,7 +333,7 @@ end  -- for mtx-context etc: aaaa bbbb cccc=dddd eeee=ffff -local str      = C((1-whitespace-equal)^1) +local str      = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)  local setting  = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset)  local splitter = setting^1 @@ -305,6 +341,12 @@ function utilities.parsers.options_to_hash(str,target)      return str and lpegmatch(splitter,str,1,target or { }) or { }  end +local splitter = lpeg.tsplitat(" ") + +function utilities.parsers.options_to_array(str) +    return str and lpegmatch(splitter,str) or { } +end +  -- for chem (currently one level)  local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace) @@ -436,7 +478,7 @@ local defaultspecification = { separator = ",", quote = '"' }  -- database module  function parsers.csvsplitter(specification) -    specification   = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification +    specification   = specification and setmetatableindex(specification,defaultspecification) or defaultspecification      local separator = specification.separator      local quotechar = specification.quote      local separator = S(separator ~= "" and separator or ",") @@ -475,7 +517,7 @@ end  -- local list, names = mycsvsplitter(crap)        inspect(list) inspect(names)  function parsers.rfc4180splitter(specification) -    specification     = specification and table.setmetatableindex(specification,defaultspecification) or defaultspecification +    specification     = specification and setmetatableindex(specification,defaultspecification) or defaultspecification      local separator   = specification.separator --> rfc: COMMA      local quotechar   = P(specification.quote)  -->      DQUOTE      local dquotechar  = quotechar * quotechar   -->      2DQUOTE @@ -488,7 +530,7 @@ function parsers.rfc4180splitter(specification)      local field       = escaped + non_escaped + Cc("")      local record      = Ct(field * (separator * field)^1)      local headerline  = record * Cp() -    local wholeblob   = Ct((newline^-1 * record)^0) +    local wholeblob   = Ct((newline^(specification.strict and -1 or 1) * record)^0)      return function(data,getheader)          if getheader then              local header, position = lpegmatch(headerline,data) @@ -560,10 +602,10 @@ end  -- print(utilities.parsers.unittotex("10^-32 %"),utilities.parsers.unittoxml("10^32 %"))  local cache   = { } -local spaces  = lpeg.patterns.space^0 +local spaces  = lpegpatterns.space^0  local dummy   = function() end -table.setmetatableindex(cache,function(t,k) +setmetatableindex(cache,function(t,k)      local separator = P(k)      local value     = (1-separator)^0      local pattern   = spaces * C(value) * separator^0 * Cp() diff --git a/lualibs-util-sta.lua b/lualibs-util-sta.lua index 1a61ec4..27ab5a6 100644 --- a/lualibs-util-sta.lua +++ b/lualibs-util-sta.lua @@ -81,6 +81,8 @@ end  function stacker.new(name) +    local report = logs.reporter("stacker",name or nil) +      local s      local stack = { } @@ -126,8 +128,18 @@ function stacker.new(name)          end      end -    local tops = { } -    local top, switch +    local tops   = { } +    local top    = nil +    local switch = nil + +    local function resolve_reset(mode) +        if #tops > 0 then +            report("resetting %s left-over states of %a",#tops,name) +        end +        tops   = { } +        top    = nil +        switch = nil +    end      local function resolve_begin(mode)          if mode then @@ -206,8 +218,7 @@ function stacker.new(name)      local function resolve_end()       -- resolve_step(s.unset) -        local noftop = #top -        if noftop > 0 then +        if #tops > 0 then -- was #top brrr              local result = s.stop(s,top,1,#top)              remove(tops)              top = tops[#tops] @@ -224,8 +235,6 @@ function stacker.new(name)          resolve_end()      end -    local report = logs.reporter("stacker",name or nil) -      s = {          name          = name or "unknown",          unset         = -1, @@ -240,6 +249,7 @@ function stacker.new(name)          resolve_begin = resolve_begin,          resolve_step  = resolve_step,          resolve_end   = resolve_end, +        resolve_reset = resolve_reset,      }      return s -- we can overload functions diff --git a/lualibs-util-sto.lua b/lualibs-util-sto.lua index 8aafca4..d21267d 100644 --- a/lualibs-util-sto.lua +++ b/lualibs-util-sto.lua @@ -113,22 +113,16 @@ local f_index = {      ["number"] = f_number,  } -local t_index = { -    ["empty"]  = { __index = f_empty  }, -    ["self"]   = { __index = f_self   }, -    ["table"]  = { __index = f_table  }, -    ["number"] = { __index = f_number }, -} -  function table.setmetatableindex(t,f)      if type(t) ~= "table" then          f, t = t, { }      end      local m = getmetatable(t) +    local i = f_index[f] or f      if m then -        m.__index = f_index[f] or f +        m.__index = i      else -        setmetatable(t,t_index[f] or { __index = f }) +        setmetatable(t,{ __index = i })      end      return t  end @@ -137,19 +131,16 @@ local f_index = {      ["ignore"] = f_ignore,  } -local t_index = { -    ["ignore"] = { __newindex = f_ignore }, -} -  function table.setmetatablenewindex(t,f)      if type(t) ~= "table" then          f, t = t, { }      end      local m = getmetatable(t) +    local i = f_index[f] or f      if m then -        m.__newindex = f_index[f] or f +        m.__newindex = i      else -        setmetatable(t,t_index[f] or { __newindex = f }) +        setmetatable(t,{ __newindex = i })      end      return t  end diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua index 2739a20..95534c8 100644 --- a/lualibs-util-str.lua +++ b/lualibs-util-str.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['util-str'] = {      license   = "see context related readme files"  } -utilities         = utilities or {} +utilities         = utilities or { }  utilities.strings = utilities.strings or { }  local strings     = utilities.strings @@ -44,7 +44,12 @@ end  if not number then number = { } end -- temp hack for luatex-fonts -local stripper = patterns.stripzeros +local stripper    = patterns.stripzeros +local newline     = patterns.newline +local endofstring = patterns.endofstring +local whitespace  = patterns.whitespace +local spacer      = patterns.spacer +local spaceortab  = patterns.spaceortab  local function points(n)      n = tonumber(n) @@ -62,12 +67,12 @@ number.basepoints = basepoints  -- str = " \n \ntest  \n test\ntest "  -- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]") -local rubish     = patterns.spaceortab^0 * patterns.newline -local anyrubish  = patterns.spaceortab + patterns.newline +local rubish     = spaceortab^0 * newline +local anyrubish  = spaceortab + newline  local anything   = patterns.anything -local stripped   = (patterns.spaceortab^1 / "") * patterns.newline +local stripped   = (spaceortab^1 / "") * newline  local leading    = rubish^0 / "" -local trailing   = (anyrubish^1 * patterns.endofstring) / "" +local trailing   = (anyrubish^1 * endofstring) / ""  local redundant  = rubish^3 / "\n"  local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0) @@ -129,7 +134,7 @@ local pattern =                return ""            end        end -    + patterns.newline * Cp() / function(position) +    + newline * Cp() / function(position)            extra, start = 0, position        end      + patterns.anything @@ -162,11 +167,6 @@ end  --     return str  -- end -local newline     = patterns.newline -local endofstring = patterns.endofstring -local whitespace  = patterns.whitespace -local spacer      = patterns.spacer -  local space       = spacer^0  local nospace     = space/""  local endofline   = nospace * newline @@ -219,10 +219,12 @@ local striplinepatterns = {      ["collapse"]            = patterns.collapser, -- how about: stripper fullstripper  } +setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end }) +  strings.striplinepatterns = striplinepatterns  function strings.striplines(str,how) -    return str and lpegmatch(how and striplinepatterns[how] or p_prune_collapse,str) or str +    return str and lpegmatch(striplinepatterns[how],str) or str  end  -- also see: string.collapsespaces @@ -352,17 +354,26 @@ function string.autosingle(s,sep)      return ("'" .. tostring(s) .. "'")  end -local tracedchars  = { } +local tracedchars  = { [0] = +    -- the regular bunch +    "[null]", "[soh]", "[stx]", "[etx]", "[eot]", "[enq]", "[ack]", "[bel]", +    "[bs]",   "[ht]",  "[lf]",  "[vt]",  "[ff]",  "[cr]",  "[so]",  "[si]", +    "[dle]",  "[dc1]", "[dc2]", "[dc3]", "[dc4]", "[nak]", "[syn]", "[etb]", +    "[can]",  "[em]",  "[sub]", "[esc]", "[fs]",  "[gs]",  "[rs]",  "[us]", +    -- plus space +    "[space]", -- 0x20 +} +  string.tracedchars = tracedchars  strings.tracers    = tracedchars  function string.tracedchar(b)      -- todo: table      if type(b) == "number" then -        return tracedchars[b] or (utfchar(b) .. " (U+" .. format('%05X',b) .. ")") +        return tracedchars[b] or (utfchar(b) .. " (U+" .. format("%05X",b) .. ")")      else          local c = utfbyte(b) -        return tracedchars[c] or (b .. " (U+" .. format('%05X',c) .. ")") +        return tracedchars[c] or (b .. " (U+" .. (c and format("%05X",c) or "?????") .. ")")      end  end @@ -537,7 +548,7 @@ end  -- We could probably use just %s with integers but who knows what Lua 5.3 will do? So let's  -- for the moment use %i. -local format_F = function() -- beware, no cast to number +local format_F = function(f) -- beware, no cast to number      n = n + 1      if not f or f == "" then          return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n) @@ -842,7 +853,7 @@ local builder = Cs { "start",                + V("m") + V("M") -- new                + V("z") -- new                -- -              + V("*") -- ignores probably messed up % +           -- + V("?") -- ignores probably messed up %              )            + V("*")          ) @@ -897,6 +908,7 @@ local builder = Cs { "start",      ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring)      --      ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest, -- rest (including %%) +    ["?"] = Cs(((1-P("%"))^1               )^1) / format_rest, -- rest (including %%)      --      ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension,  } @@ -1114,3 +1126,9 @@ local pattern =  function string.optionalquoted(str)      return lpegmatch(pattern,str) or str  end + +local pattern = Cs((newline / (os.newline or "\r") + 1)^0) + +function string.replacenewlines(str) +    return lpegmatch(pattern,str) +end diff --git a/lualibs-util-tab.lua b/lualibs-util-tab.lua index f9e9b31..d6f3d67 100644 --- a/lualibs-util-tab.lua +++ b/lualibs-util-tab.lua @@ -11,7 +11,7 @@ utilities.tables = utilities.tables or { }  local tables     = utilities.tables  local format, gmatch, gsub, sub = string.format, string.gmatch, string.gsub, string.sub -local concat, insert, remove = table.concat, table.insert, table.remove +local concat, insert, remove, sort = table.concat, table.insert, table.remove, table.sort  local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring  local type, next, rawset, tonumber, tostring, load, select = type, next, rawset, tonumber, tostring, load, select  local lpegmatch, P, Cs, Cc = lpeg.match, lpeg.P, lpeg.Cs, lpeg.Cc @@ -98,6 +98,17 @@ function tables.removevalue(t,value) -- todo: n      end  end +function tables.replacevalue(t,oldvalue,newvalue) +    if oldvalue and newvalue then +        for i=1,#t do +            if t[i] == oldvalue then +                t[i] = newvalue +                -- replace all, so no: return +            end +        end +    end +end +  function tables.insertbeforevalue(t,value,extra)      for i=1,#t do          if t[i] == extra then @@ -534,6 +545,7 @@ local f_val_str           = formatters["%w%q,"]  local f_val_boo           = formatters["%w%l,"]  local f_val_not           = formatters["%w{},"]  local f_val_seq           = formatters["%w{ %, t },"] +local f_fin_seq           = formatters[" %, t }"]  local f_table_return      = formatters["return {"]  local f_table_name        = formatters["%s={"] @@ -545,23 +557,58 @@ local f_table_finish      = formatters["}"]  local spaces = utilities.strings.newrepeater(" ") -local serialize = table.serialize -- the extensive one, the one we started with +local original_serialize = table.serialize -- the extensive one, the one we started with  -- there is still room for optimization: index run, key run, but i need to check with the  -- latest lua for the value of #n (with holes) .. anyway for tracing purposes we want  -- indices / keys being sorted, so it will never be real fast -function table.serialize(root,name,specification) +local function serialize(root,name,specification)      if type(specification) == "table" then -        return serialize(root,name,specification) -- the original one +        return original_serialize(root,name,specification) -- the original one      end -    local t -- = { } -    local n = 1 +    local t    -- = { } +    local n       = 1 +    local unknown = false + +--     local function simple_table(t) +--         local ts = #t +--         if ts > 0 then +--             local n = 0 +--             for _, v in next, t do +--                 n = n + 1 +--                 if type(v) == "table" then +--                     return nil +--                 end +--             end +--             if n == ts then +--                 local tt = { } +--                 local nt = 0 +--                 for i=1,ts do +--                     local v = t[i] +--                     local tv = type(v) +--                     nt = nt + 1 +--                     if tv == "number" then +--                         tt[nt] = v +--                     elseif tv == "string" then +--                         tt[nt] = format("%q",v) -- f_string(v) +--                     elseif tv == "boolean" then +--                         tt[nt] = v and "true" or "false" +--                     else +--                         return nil +--                     end +--                 end +--                 return tt +--             end +--         end +--         return nil +--     end      local function simple_table(t) -        if #t > 0 then +        local nt = #t +        if nt > 0 then              local n = 0              for _, v in next, t do                  n = n + 1 @@ -569,24 +616,40 @@ function table.serialize(root,name,specification)                      return nil                  end              end -            if n == #t then +            local haszero = t[0] +            if n == nt then                  local tt = { } -                local nt = 0 -                for i=1,#t do +                for i=1,nt do                      local v = t[i]                      local tv = type(v) -                    nt = nt + 1                      if tv == "number" then -                        tt[nt] = v +                        tt[i] = v -- not needed tostring(v)                      elseif tv == "string" then -                        tt[nt] = format("%q",v) -- f_string(v) +                        tt[i] = format("%q",v) -- f_string(v)                      elseif tv == "boolean" then -                        tt[nt] = v and "true" or "false" +                        tt[i] = v and "true" or "false"                      else                          return nil                      end                  end                  return tt +            elseif haszero and (n == nt + 1) then +                local tt = { } +                for i=0,nt do +                    local v = t[i] +                    local tv = type(v) +                    if tv == "number" then +                        tt[i+1] = v -- not needed tostring(v) +                    elseif tv == "string" then +                        tt[i+1] = format("%q",v) -- f_string(v) +                    elseif tv == "boolean" then +                        tt[i+1] = v and "true" or "false" +                    else +                        return nil +                    end +                end +                tt[1] = "[0] = " .. tt[1] +                return tt              end          end          return nil @@ -612,7 +675,7 @@ function table.serialize(root,name,specification)              depth = depth + 1          end          -- we could check for k (index) being number (cardinal) -        if root and next(root) then +        if root and next(root) ~= nil then              local first = nil              local last  = 0              last = #root @@ -625,19 +688,19 @@ function table.serialize(root,name,specification)              if last > 0 then                  first = 1              end -            local sk = sortedkeys(root) -- inline fast version? +            local sk = sortedkeys(root) -- inline fast version?\              for i=1,#sk do                  local k  = sk[i]                  local v  = root[k]                  local tv = type(v)                  local tk = type(k) -                if first and tk == "number" and k >= first and k <= last then +                if first and tk == "number" and k <= last and k >= first then                      if tv == "number" then                          n = n + 1 t[n] = f_val_num(depth,v)                      elseif tv == "string" then                          n = n + 1 t[n] = f_val_str(depth,v)                      elseif tv == "table" then -                        if not next(v) then +                        if next(v) == nil then -- tricky as next is unpredictable in a hash                              n = n + 1 t[n] = f_val_not(depth)                          else                              local st = simple_table(v) @@ -649,6 +712,8 @@ function table.serialize(root,name,specification)                          end                      elseif tv == "boolean" then                          n = n + 1 t[n] = f_val_boo(depth,v) +                    elseif unknown then +                        n = n + 1 t[n] = f_val_str(depth,tostring(v))                      end                  elseif tv == "number" then                      if tk == "number" then @@ -657,6 +722,8 @@ function table.serialize(root,name,specification)                          n = n + 1 t[n] = f_key_str_value_num(depth,k,v)                      elseif tk == "boolean" then                          n = n + 1 t[n] = f_key_boo_value_num(depth,k,v) +                    elseif unknown then +                        n = n + 1 t[n] = f_key_str_value_num(depth,tostring(k),v)                      end                  elseif tv == "string" then                      if tk == "number" then @@ -665,15 +732,19 @@ function table.serialize(root,name,specification)                          n = n + 1 t[n] = f_key_str_value_str(depth,k,v)                      elseif tk == "boolean" then                          n = n + 1 t[n] = f_key_boo_value_str(depth,k,v) +                    elseif unknown then +                        n = n + 1 t[n] = f_key_str_value_str(depth,tostring(k),v)                      end                  elseif tv == "table" then -                    if not next(v) then +                    if next(v) == nil then                          if tk == "number" then -                            n = n + 1 t[n] = f_key_num_value_not(depth,k,v) +                            n = n + 1 t[n] = f_key_num_value_not(depth,k)                          elseif tk == "string" then -                            n = n + 1 t[n] = f_key_str_value_not(depth,k,v) +                            n = n + 1 t[n] = f_key_str_value_not(depth,k)                          elseif tk == "boolean" then -                            n = n + 1 t[n] = f_key_boo_value_not(depth,k,v) +                            n = n + 1 t[n] = f_key_boo_value_not(depth,k) +                        elseif unknown then +                            n = n + 1 t[n] = f_key_str_value_not(depth,tostring(k))                          end                      else                          local st = simple_table(v) @@ -685,6 +756,8 @@ function table.serialize(root,name,specification)                              n = n + 1 t[n] = f_key_str_value_seq(depth,k,st)                          elseif tk == "boolean" then                              n = n + 1 t[n] = f_key_boo_value_seq(depth,k,st) +                        elseif unknown then +                            n = n + 1 t[n] = f_key_str_value_seq(depth,tostring(k),st)                          end                      end                  elseif tv == "boolean" then @@ -694,6 +767,18 @@ function table.serialize(root,name,specification)                          n = n + 1 t[n] = f_key_str_value_boo(depth,k,v)                      elseif tk == "boolean" then                          n = n + 1 t[n] = f_key_boo_value_boo(depth,k,v) +                    elseif unknown then +                        n = n + 1 t[n] = f_key_str_value_boo(depth,tostring(k),v) +                    end +                else +                    if tk == "number" then +                        n = n + 1 t[n] = f_key_num_value_str(depth,k,tostring(v)) +                    elseif tk == "string" then +                        n = n + 1 t[n] = f_key_str_value_str(depth,k,tostring(v)) +                    elseif tk == "boolean" then +                        n = n + 1 t[n] = f_key_boo_value_str(depth,k,tostring(v)) +                    elseif unknown then +                        n = n + 1 t[n] = f_key_str_value_str(depth,tostring(k),tostring(v))                      end                  end              end @@ -731,11 +816,22 @@ function table.serialize(root,name,specification)              root._w_h_a_t_e_v_e_r_ = nil          end          -- Let's forget about empty tables. -        if next(root) then -            do_serialize(root,name,1,0) +        if next(root) ~= nil then +            local st = simple_table(root) +            if st then +                return t[1] .. f_fin_seq(st) -- todo: move up and in one go +            else +                do_serialize(root,name,1,0) +            end          end      end      n = n + 1      t[n] = f_table_finish()      return concat(t,"\n")  end + +table.serialize = serialize + +if setinspector then +    setinspector("table",function(v) if type(v) == "table" then print(serialize(v,"table")) return true end end) +end diff --git a/lualibs-util-tpl.lua b/lualibs-util-tpl.lua index 67d0582..468dd42 100644 --- a/lualibs-util-tpl.lua +++ b/lualibs-util-tpl.lua @@ -52,7 +52,7 @@ local sqlescape = lpeg.replacer {   -- { "\t",   "\\t"  },  } -local sqlquoted = lpeg.Cs(lpeg.Cc("'") * sqlescape * lpeg.Cc("'")) +local sqlquoted = Cs(Cc("'") * sqlescape * Cc("'"))  lpegpatterns.sqlescape = sqlescape  lpegpatterns.sqlquoted = sqlquoted @@ -111,13 +111,26 @@ local luaescaper       = escapers.lua  local quotedluaescaper = quotedescapers.lua  local function replacekeyunquoted(s,t,how,recurse) -- ".. \" " -    local escaper = how and escapers[how] or luaescaper -    return escaper(replacekey(s,t,how,recurse)) +    if how == false then +        return replacekey(s,t,how,recurse) +    else +        local escaper = how and escapers[how] or luaescaper +        return escaper(replacekey(s,t,how,recurse)) +    end  end  local function replacekeyquoted(s,t,how,recurse) -- ".. \" " -    local escaper = how and quotedescapers[how] or quotedluaescaper -    return escaper(replacekey(s,t,how,recurse)) +    if how == false then +        return replacekey(s,t,how,recurse) +    else +        local escaper = how and quotedescapers[how] or quotedluaescaper +        return escaper(replacekey(s,t,how,recurse)) +    end +end + +local function replaceoptional(l,m,r,t,how,recurse) +    local v = t[l] +    return v and v ~= "" and lpegmatch(replacer,r,1,t,how or "lua",recurse or false) or ""  end  local single      = P("%")  -- test %test% test     : resolves test @@ -135,12 +148,19 @@ local norquoted   = rquoted  / ''  local nolquotedq  = lquotedq / ''  local norquotedq  = rquotedq / '' -local key         = nosingle   * ((C((1-nosingle  )^1) * Carg(1) * Carg(2) * Carg(3)) / replacekey        ) * nosingle -local quoted      = nolquotedq * ((C((1-norquotedq)^1) * Carg(1) * Carg(2) * Carg(3)) / replacekeyquoted  ) * norquotedq -local unquoted    = nolquoted  * ((C((1-norquoted )^1) * Carg(1) * Carg(2) * Carg(3)) / replacekeyunquoted) * norquoted +local noloptional = P("%?") / '' +local noroptional = P("?%") / '' +local nomoptional = P(":")  / '' + + +local args        = Carg(1) * Carg(2) * Carg(3) +local key         = nosingle    * ((C((1-nosingle   )^1) * args) / replacekey        ) * nosingle +local quoted      = nolquotedq  * ((C((1-norquotedq )^1) * args) / replacekeyquoted  ) * norquotedq +local unquoted    = nolquoted   * ((C((1-norquoted  )^1) * args) / replacekeyunquoted) * norquoted +local optional    = noloptional * ((C((1-nomoptional)^1) * nomoptional * C((1-noroptional)^1) * args) / replaceoptional) *  noroptional  local any         = P(1) -      replacer    = Cs((unquoted + quoted + escape + key + any)^0) +      replacer    = Cs((unquoted + quoted + escape + optional + key + any)^0)  local function replace(str,mapping,how,recurse)      if mapping and str then @@ -156,6 +176,7 @@ end  -- print(replace("test '%[x]%' test",{ x = [[a '%y%'  a]], y = "oeps" },'sql',true))  -- print(replace([[test %[x]% test]],{ x = [[a "x"  a]]}))  -- print(replace([[test %(x)% test]],{ x = [[a "x"  a]]})) +-- print(replace([[convert %?x: -x "%x%" ?% %?y: -y "%y%" ?%]],{ x = "yes" }))  templates.replace = replace @@ -188,3 +209,5 @@ end  -- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" }))  -- inspect(utilities.templates.resolve({ one = "%two%", two = "two", three = "%three%" })) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false,true)) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false)) diff --git a/lualibs.dtx b/lualibs.dtx index 8d43a34..d1cee61 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -149,7 +149,7 @@ and the derived file lualibs.lua.  \definehighlight    [fileent][\ttfamily\restoreunderscore]         %% files, dirs  \definehighlight   [texmacro][\sffamily\itshape\textbackslash]     %% cs -\definehighlight[luafunction][\sffamily\itshape\restoreunderscore] %% lua identifiers +\definehighlight   [luaident][\sffamily\itshape\restoreunderscore] %% lua identifiers  \definehighlight [identifier][\sffamily]                           %% names  \definehighlight     [abbrev][\rmfamily\scshape]                   %% acronyms  \definehighlight   [emphasis][\rmfamily\slshape]                   %% level 1 emph @@ -246,7 +246,7 @@ and the derived file lualibs.lua.  %   \CONTEXT loads with every run.  % }  % the \identifier{lualibs} package can skip loading of the latter on demand. -% The \luafunction{config} table needs to be present prior to loading the +% The \luaident{config} table needs to be present prior to loading the  % package for this to work (in the future this may be achieved by an option of  % \texmacro{usepackage}) for \LATEX users).  % In the \verb|lualibs| field, set \verb|load_extended| to false: @@ -282,7 +282,7 @@ and the derived file lualibs.lua.  %  % \noindent  % If your code is run by the \fileent{texlua} intepreter, you will need to -% initialize \luafunction{kpse} library so that |require()| can find files +% initialize \luaident{kpse} library so that |require()| can find files  % under \abbrev{texmf} tree: |kpse.set_program_name("luatex")|.  %  % \section{Files} @@ -363,11 +363,14 @@ and the derived file lualibs.lua.  % This is achieved by means of \identifier{mtx-package}, a script for bundling  % \LUA code shipped with \CONTEXT.  % This concatenates the code of several \LUA files into a single file that is -% both easier to distribute and loading (marginally) faster. +% both easier to distribute and loading marginally faster.  % \identifier{mtx-package} ensures that the code from each file gets its  % own closure and strips newlines and comments, resulting in a smaller payload.  % Another package that relies on it heavily is the font loader as contained in  % \identifier{luaotfload} and \identifier{luatex-fonts}. +% \identifier{Luaotfload}, a port of the \CONTEXT fontloader for Plain and +% \LATEX, has a hard dependency on the functionality provided by the % +% \identifier{Lualibs} package. The packages should not be updated independently.  %  % If \CONTEXT is installed on the system, the merge files can be created  % by running:  | 
