diff options
Diffstat (limited to 'src/fontloader/misc/fontloader-util-str.lua')
-rw-r--r-- | src/fontloader/misc/fontloader-util-str.lua | 1119 |
1 files changed, 1119 insertions, 0 deletions
diff --git a/src/fontloader/misc/fontloader-util-str.lua b/src/fontloader/misc/fontloader-util-str.lua new file mode 100644 index 0000000..a040b01 --- /dev/null +++ b/src/fontloader/misc/fontloader-util-str.lua @@ -0,0 +1,1119 @@ +if not modules then modules = { } end modules ['util-str'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +utilities = utilities or {} +utilities.strings = utilities.strings or { } +local strings = utilities.strings + +local format, gsub, rep, sub = string.format, string.gsub, string.rep, string.sub +local load, dump = load, string.dump +local tonumber, type, tostring = tonumber, type, tostring +local unpack, concat = table.unpack, table.concat +local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc +local patterns, lpegmatch = lpeg.patterns, lpeg.match +local utfchar, utfbyte = utf.char, utf.byte +----- loadstripped = utilities.lua.loadstripped +----- setmetatableindex = table.setmetatableindex + +local loadstripped = nil + +if _LUAVERSION < 5.2 then + + loadstripped = function(str,shortcuts) + return load(str) + end + +else + + loadstripped = function(str,shortcuts) + if shortcuts then + return load(dump(load(str),true),nil,nil,shortcuts) + else + return load(dump(load(str),true)) + end + end + +end + +-- todo: make a special namespace for the formatter + +if not number then number = { } end -- temp hack for luatex-fonts + +local stripper = patterns.stripzeros + +local function points(n) + n = tonumber(n) + return (not n or n == 0) and "0pt" or lpegmatch(stripper,format("%.5fpt",n/65536)) +end + +local function basepoints(n) + n = tonumber(n) + return (not n or n == 0) and "0bp" or lpegmatch(stripper,format("%.5fbp", n*(7200/7227)/65536)) +end + +number.points = points +number.basepoints = basepoints + +-- str = " \n \ntest \n test\ntest " +-- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]") + +local rubish = patterns.spaceortab^0 * patterns.newline +local anyrubish = patterns.spaceortab + patterns.newline +local anything = patterns.anything +local stripped = (patterns.spaceortab^1 / "") * patterns.newline +local leading = rubish^0 / "" +local trailing = (anyrubish^1 * patterns.endofstring) / "" +local redundant = rubish^3 / "\n" + +local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0) + +function strings.collapsecrlf(str) + return lpegmatch(pattern,str) +end + +-- The following functions might end up in another namespace. + +local repeaters = { } -- watch how we also moved the -1 in depth-1 to the creator + +function strings.newrepeater(str,offset) + offset = offset or 0 + local s = repeaters[str] + if not s then + s = { } + repeaters[str] = s + end + local t = s[offset] + if t then + return t + end + t = { } + setmetatable(t, { __index = function(t,k) + if not k then + return "" + end + local n = k + offset + local s = n > 0 and rep(str,n) or "" + t[k] = s + return s + end }) + s[offset] = t + return t +end + +-- local dashes = strings.newrepeater("--",-1) +-- print(dashes[2],dashes[3],dashes[1]) + +local extra, tab, start = 0, 0, 4, 0 + +local nspaces = strings.newrepeater(" ") + +string.nspaces = nspaces + +local pattern = + Carg(1) / function(t) + extra, tab, start = 0, t or 7, 1 + end + * Cs(( + Cp() * patterns.tab / function(position) + local current = (position - start + 1) + extra + local spaces = tab-(current-1) % tab + if spaces > 0 then + extra = extra + spaces - 1 + return nspaces[spaces] -- rep(" ",spaces) + else + return "" + end + end + + patterns.newline * Cp() / function(position) + extra, start = 0, position + end + + patterns.anything + )^1) + +function strings.tabtospace(str,tab) + return lpegmatch(pattern,str,1,tab or 7) +end + +-- local t = { +-- "1234567123456712345671234567", +-- "\tb\tc", +-- "a\tb\tc", +-- "aa\tbb\tcc", +-- "aaa\tbbb\tccc", +-- "aaaa\tbbbb\tcccc", +-- "aaaaa\tbbbbb\tccccc", +-- "aaaaaa\tbbbbbb\tcccccc\n aaaaaa\tbbbbbb\tcccccc", +-- "one\n two\nxxx three\nxx four\nx five\nsix", +-- } +-- for k=1,#t do +-- print(strings.tabtospace(t[k])) +-- end + +-- todo: lpeg + +-- function strings.striplong(str) -- strips all leading spaces +-- str = gsub(str,"^%s*","") +-- str = gsub(str,"[\n\r]+ *","\n") +-- return str +-- end + +local newline = patterns.newline +local endofstring = patterns.endofstring +local whitespace = patterns.whitespace +local spacer = patterns.spacer + +local space = spacer^0 +local nospace = space/"" +local endofline = nospace * newline + +local stripend = (whitespace^1 * endofstring)/"" + +local normalline = (nospace * ((1-space*(newline+endofstring))^1) * nospace) + +local stripempty = endofline^1/"" +local normalempty = endofline^1 +local singleempty = endofline * (endofline^0/"") +local doubleempty = endofline * endofline^-1 * (endofline^0/"") + +local stripstart = stripempty^0 + +local p_prune_normal = Cs ( stripstart * ( stripend + normalline + normalempty )^0 ) +local p_prune_collapse = Cs ( stripstart * ( stripend + normalline + doubleempty )^0 ) +local p_prune_noempty = Cs ( stripstart * ( stripend + normalline + singleempty )^0 ) +local p_retain_normal = Cs ( ( normalline + normalempty )^0 ) +local p_retain_collapse = Cs ( ( normalline + doubleempty )^0 ) +local p_retain_noempty = Cs ( ( normalline + singleempty )^0 ) + +-- function striplines(str,prune,collapse,noempty) +-- if prune then +-- if noempty then +-- return lpegmatch(p_prune_noempty,str) or str +-- elseif collapse then +-- return lpegmatch(p_prune_collapse,str) or str +-- else +-- return lpegmatch(p_prune_normal,str) or str +-- end +-- else +-- if noempty then +-- return lpegmatch(p_retain_noempty,str) or str +-- elseif collapse then +-- return lpegmatch(p_retain_collapse,str) or str +-- else +-- return lpegmatch(p_retain_normal,str) or str +-- end +-- end +-- end + +local striplinepatterns = { + ["prune"] = p_prune_normal, + ["prune and collapse"] = p_prune_collapse, -- default + ["prune and no empty"] = p_prune_noempty, + ["retain"] = p_retain_normal, + ["retain and collapse"] = p_retain_collapse, + ["retain and no empty"] = p_retain_noempty, + ["collapse"] = patterns.collapser, -- how about: stripper fullstripper +} + +setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end }) + +strings.striplinepatterns = striplinepatterns + +function strings.striplines(str,how) + return str and lpegmatch(striplinepatterns[how],str) or str +end + +-- also see: string.collapsespaces + +strings.striplong = strings.striplines -- for old times sake + +-- local str = table.concat( { +-- " ", +-- " aap", +-- " noot mies", +-- " ", +-- " ", +-- " zus wim jet", +-- "zus wim jet", +-- " zus wim jet", +-- " ", +-- }, "\n") + +-- local str = table.concat( { +-- " aaaa", +-- " bb", +-- " cccccc", +-- }, "\n") + +-- for k, v in table.sortedhash(utilities.strings.striplinepatterns) do +-- logs.report("stripper","method: %s, result: [[%s]]",k,utilities.strings.striplines(str,k)) +-- end + +-- inspect(strings.striplong([[ +-- aaaa +-- bb +-- cccccc +-- ]])) + +function strings.nice(str) + str = gsub(str,"[:%-+_]+"," ") -- maybe more + return str +end + +-- Work in progress. Interesting is that compared to the built-in this is faster in +-- luatex than in luajittex where we have a comparable speed. It only makes sense +-- to use the formatter when a (somewhat) complex format is used a lot. Each formatter +-- is a function so there is some overhead and not all formatted output is worth that +-- overhead. Keep in mind that there is an extra function call involved. In principle +-- we end up with a string concatination so one could inline such a sequence but often +-- at the cost of less readabinity. So, it's a sort of (visual) compromise. Of course +-- there is the benefit of more variants. (Concerning the speed: a simple format like +-- %05fpt is better off with format than with a formatter, but as soon as you put +-- something in front formatters become faster. Passing the pt as extra argument makes +-- formatters behave better. Of course this is rather implementation dependent. Also, +-- when a specific format is only used a few times the overhead in creating it is not +-- compensated by speed.) +-- +-- More info can be found in cld-mkiv.pdf so here I stick to a simple list. +-- +-- integer %...i number +-- integer %...d number +-- unsigned %...u number +-- character %...c number +-- hexadecimal %...x number +-- HEXADECIMAL %...X number +-- octal %...o number +-- string %...s string number +-- float %...f number +-- checked float %...F number +-- exponential %...e number +-- exponential %...E number +-- autofloat %...g number +-- autofloat %...G number +-- utf character %...c number +-- force tostring %...S any +-- force tostring %Q any +-- force tonumber %N number (strip leading zeros) +-- signed number %I number +-- rounded number %r number +-- 0xhexadecimal %...h character number +-- 0xHEXADECIMAL %...H character number +-- U+hexadecimal %...u character number +-- U+HEXADECIMAL %...U character number +-- points %p number (scaled points) +-- basepoints %b number (scaled points) +-- table concat %...t table +-- table concat %{.}t table +-- serialize %...T sequenced (no nested tables) +-- serialize %{.}T sequenced (no nested tables) +-- boolean (logic) %l boolean +-- BOOLEAN %L boolean +-- whitespace %...w +-- automatic %...a 'whatever' (string, table, ...) +-- automatic %...A "whatever" (string, table, ...) + +local n = 0 + +-- we are somewhat sloppy in parsing prefixes as it's not that critical + +-- hard to avoid but we can collect them in a private namespace if needed + +-- inline the next two makes no sense as we only use this in logging + +local sequenced = table.sequenced + +function string.autodouble(s,sep) + if s == nil then + return '""' + end + local t = type(s) + if t == "number" then + return tostring(s) -- tostring not really needed + end + if t == "table" then + return ('"' .. sequenced(s,sep or ",") .. '"') + end + return ('"' .. tostring(s) .. '"') +end + +function string.autosingle(s,sep) + if s == nil then + return "''" + end + local t = type(s) + if t == "number" then + return tostring(s) -- tostring not really needed + end + if t == "table" then + return ("'" .. sequenced(s,sep or ",") .. "'") + end + return ("'" .. tostring(s) .. "'") +end + +local tracedchars = { } +string.tracedchars = tracedchars +strings.tracers = tracedchars + +function string.tracedchar(b) + -- todo: table + if type(b) == "number" then + return tracedchars[b] or (utfchar(b) .. " (U+" .. format('%05X',b) .. ")") + else + local c = utfbyte(b) + return tracedchars[c] or (b .. " (U+" .. format('%05X',c) .. ")") + end +end + +function number.signed(i) + if i > 0 then + return "+", i + else + return "-", -i + end +end + +local zero = P("0")^1 / "" +local plus = P("+") / "" +local minus = P("-") +local separator = S(".") +local digit = R("09") +local trailing = zero^1 * #S("eE") +local exponent = (S("eE") * (plus + Cs((minus * zero^0 * P(-1))/"") + minus) * zero^0 * (P(-1) * Cc("0") + P(1)^1)) +local pattern_a = Cs(minus^0 * digit^1 * (separator/"" * trailing + separator * (trailing + digit)^0) * exponent) +local pattern_b = Cs((exponent + P(1))^0) + +function number.sparseexponent(f,n) + if not n then + n = f + f = "%e" + end + local tn = type(n) + if tn == "string" then -- cast to number + local m = tonumber(n) + if m then + return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,m)) + end + elseif tn == "number" then + return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,n)) + end + return tostring(n) +end + +local template = [[ +%s +%s +return function(%s) return %s end +]] + +local preamble, environment = "", { } + +if _LUAVERSION < 5.2 then + + preamble = [[ +local lpeg=lpeg +local type=type +local tostring=tostring +local tonumber=tonumber +local format=string.format +local concat=table.concat +local signed=number.signed +local points=number.points +local basepoints= number.basepoints +local utfchar=utf.char +local utfbyte=utf.byte +local lpegmatch=lpeg.match +local nspaces=string.nspaces +local tracedchar=string.tracedchar +local autosingle=string.autosingle +local autodouble=string.autodouble +local sequenced=table.sequenced +local formattednumber=number.formatted +local sparseexponent=number.sparseexponent + ]] + +else + + environment = { + global = global or _G, + lpeg = lpeg, + type = type, + tostring = tostring, + tonumber = tonumber, + format = string.format, + concat = table.concat, + signed = number.signed, + points = number.points, + basepoints = number.basepoints, + utfchar = utf.char, + utfbyte = utf.byte, + lpegmatch = lpeg.match, + nspaces = string.nspaces, + tracedchar = string.tracedchar, + autosingle = string.autosingle, + autodouble = string.autodouble, + sequenced = table.sequenced, + formattednumber = number.formatted, + sparseexponent = number.sparseexponent, + } + +end + +-- -- -- + +local arguments = { "a1" } -- faster than previously used (select(n,...)) + +setmetatable(arguments, { __index = + function(t,k) + local v = t[k-1] .. ",a" .. k + t[k] = v + return v + end +}) + +local prefix_any = C((S("+- .") + R("09"))^0) +local prefix_tab = P("{") * C((1-P("}"))^0) * P("}") + C((1-R("az","AZ","09","%%"))^0) + +-- we've split all cases as then we can optimize them (let's omit the fuzzy u) + +-- todo: replace outer formats in next by .. + +local format_s = function(f) + n = n + 1 + if f and f ~= "" then + return format("format('%%%ss',a%s)",f,n) + else -- best no tostring in order to stay compatible (.. does a selective tostring too) + return format("(a%s or '')",n) -- goodie: nil check + end +end + +local format_S = function(f) -- can be optimized + n = n + 1 + if f and f ~= "" then + return format("format('%%%ss',tostring(a%s))",f,n) + else + return format("tostring(a%s)",n) + end +end + +local format_q = function() + n = n + 1 + return format("(a%s and format('%%q',a%s) or '')",n,n) -- goodie: nil check (maybe separate lpeg, not faster) +end + +local format_Q = function() -- can be optimized + n = n + 1 + return format("format('%%q',tostring(a%s))",n) +end + +local format_i = function(f) + n = n + 1 + if f and f ~= "" then + return format("format('%%%si',a%s)",f,n) + else + return format("format('%%i',a%s)",n) -- why not just tostring() + end +end + +local format_d = format_i + +local format_I = function(f) + n = n + 1 + return format("format('%%s%%%si',signed(a%s))",f,n) +end + +local format_f = function(f) + n = n + 1 + return format("format('%%%sf',a%s)",f,n) +end + +-- The next one formats an integer as integer and very small values as zero. This is needed +-- for pdf backend code. +-- +-- 1.23 % 1 : 0.23 +-- - 1.23 % 1 : 0.77 +-- +-- We could probably use just %s with integers but who knows what Lua 5.3 will do? So let's +-- for the moment use %i. + +local format_F = function(f) -- beware, no cast to number + n = n + 1 + if not f or f == "" then + return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n) + else + return format("format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)",n,f,n) + end +end + +local format_g = function(f) + n = n + 1 + return format("format('%%%sg',a%s)",f,n) +end + +local format_G = function(f) + n = n + 1 + return format("format('%%%sG',a%s)",f,n) +end + +local format_e = function(f) + n = n + 1 + return format("format('%%%se',a%s)",f,n) +end + +local format_E = function(f) + n = n + 1 + return format("format('%%%sE',a%s)",f,n) +end + +local format_j = function(f) + n = n + 1 + return format("sparseexponent('%%%se',a%s)",f,n) +end + +local format_J = function(f) + n = n + 1 + return format("sparseexponent('%%%sE',a%s)",f,n) +end + +local format_x = function(f) + n = n + 1 + return format("format('%%%sx',a%s)",f,n) +end + +local format_X = function(f) + n = n + 1 + return format("format('%%%sX',a%s)",f,n) +end + +local format_o = function(f) + n = n + 1 + return format("format('%%%so',a%s)",f,n) +end + +local format_c = function() + n = n + 1 + return format("utfchar(a%s)",n) +end + +local format_C = function() + n = n + 1 + return format("tracedchar(a%s)",n) +end + +local format_r = function(f) + n = n + 1 + return format("format('%%%s.0f',a%s)",f,n) +end + +local format_h = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_H = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_u = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_U = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_p = function() + n = n + 1 + return format("points(a%s)",n) +end + +local format_b = function() + n = n + 1 + return format("basepoints(a%s)",n) +end + +local format_t = function(f) + n = n + 1 + if f and f ~= "" then + return format("concat(a%s,%q)",n,f) + else + return format("concat(a%s)",n) + end +end + +local format_T = function(f) + n = n + 1 + if f and f ~= "" then + return format("sequenced(a%s,%q)",n,f) + else + return format("sequenced(a%s)",n) + end +end + +local format_l = function() + n = n + 1 + return format("(a%s and 'true' or 'false')",n) +end + +local format_L = function() + n = n + 1 + return format("(a%s and 'TRUE' or 'FALSE')",n) +end + +local format_N = function() -- strips leading zeros + n = n + 1 + return format("tostring(tonumber(a%s) or a%s)",n,n) +end + +local format_a = function(f) + n = n + 1 + if f and f ~= "" then + return format("autosingle(a%s,%q)",n,f) + else + return format("autosingle(a%s)",n) + end +end + +local format_A = function(f) + n = n + 1 + if f and f ~= "" then + return format("autodouble(a%s,%q)",n,f) + else + return format("autodouble(a%s)",n) + end +end + +local format_w = function(f) -- handy when doing depth related indent + n = n + 1 + f = tonumber(f) + if f then -- not that useful + return format("nspaces[%s+a%s]",f,n) -- no real need for tonumber + else + return format("nspaces[a%s]",n) -- no real need for tonumber + end +end + +local format_W = function(f) -- handy when doing depth related indent + return format("nspaces[%s]",tonumber(f) or 0) +end + +-- maybe to util-num + +local digit = patterns.digit +local period = patterns.period +local three = digit * digit * digit + +local splitter = Cs ( + (((1 - (three^1 * period))^1 + C(three)) * (Carg(1) * three)^1 + C((1-period)^1)) + * (P(1)/"" * Carg(2)) * C(2) +) + +patterns.formattednumber = splitter + +function number.formatted(n,sep1,sep2) + local s = type(s) == "string" and n or format("%0.2f",n) + if sep1 == true then + return lpegmatch(splitter,s,1,".",",") + elseif sep1 == "." then + return lpegmatch(splitter,s,1,sep1,sep2 or ",") + elseif sep1 == "," then + return lpegmatch(splitter,s,1,sep1,sep2 or ".") + else + return lpegmatch(splitter,s,1,sep1 or ",",sep2 or ".") + end +end + +-- print(number.formatted(1)) +-- print(number.formatted(12)) +-- print(number.formatted(123)) +-- print(number.formatted(1234)) +-- print(number.formatted(12345)) +-- print(number.formatted(123456)) +-- print(number.formatted(1234567)) +-- print(number.formatted(12345678)) +-- print(number.formatted(12345678,true)) +-- print(number.formatted(1234.56,"!","?")) + +local format_m = function(f) + n = n + 1 + if not f or f == "" then + f = "," + end + return format([[formattednumber(a%s,%q,".")]],n,f) +end + +local format_M = function(f) + n = n + 1 + if not f or f == "" then + f = "." + end + return format([[formattednumber(a%s,%q,",")]],n,f) +end + +-- + +local format_z = function(f) + n = n + (tonumber(f) or 1) + return "''" -- okay, not that efficient to append '' but a special case anyway +end + +-- + +local format_rest = function(s) + return format("%q",s) -- catches " and \n and such +end + +local format_extension = function(extensions,f,name) + local extension = extensions[name] or "tostring(%s)" + local f = tonumber(f) or 1 + if f == 0 then + return extension + elseif f == 1 then + n = n + 1 + local a = "a" .. n + return format(extension,a,a) -- maybe more times? + elseif f < 0 then + local a = "a" .. (n + f + 1) + return format(extension,a,a) + else + local t = { } + for i=1,f do + n = n + 1 + t[#t+1] = "a" .. n + end + return format(extension,unpack(t)) + end +end + +-- aA b cC d eE f gG hH iI jJ lL mM N o p qQ r sS tT uU wW xX z + +local builder = Cs { "start", + start = ( + ( + P("%") / "" + * ( + V("!") -- new + + V("s") + V("q") + + V("i") + V("d") + + V("f") + V("F") + V("g") + V("G") + V("e") + V("E") + + V("x") + V("X") + V("o") + -- + + V("c") + + V("C") + + V("S") -- new + + V("Q") -- new + + V("N") -- new + -- + + V("r") + + V("h") + V("H") + V("u") + V("U") + + V("p") + V("b") + + V("t") + V("T") + + V("l") + V("L") + + V("I") + + V("w") -- new + + V("W") -- new + + V("a") -- new + + V("A") -- new + + V("j") + V("J") -- stripped e E + + V("m") + V("M") -- new + + V("z") -- new + -- + -- + V("?") -- ignores probably messed up % + ) + + V("*") + ) + * (P(-1) + Carg(1)) + )^0, + -- + ["s"] = (prefix_any * P("s")) / format_s, -- %s => regular %s (string) + ["q"] = (prefix_any * P("q")) / format_q, -- %q => regular %q (quoted string) + ["i"] = (prefix_any * P("i")) / format_i, -- %i => regular %i (integer) + ["d"] = (prefix_any * P("d")) / format_d, -- %d => regular %d (integer) + ["f"] = (prefix_any * P("f")) / format_f, -- %f => regular %f (float) + ["F"] = (prefix_any * P("F")) / format_F, -- %F => regular %f (float) but 0/1 check + ["g"] = (prefix_any * P("g")) / format_g, -- %g => regular %g (float) + ["G"] = (prefix_any * P("G")) / format_G, -- %G => regular %G (float) + ["e"] = (prefix_any * P("e")) / format_e, -- %e => regular %e (float) + ["E"] = (prefix_any * P("E")) / format_E, -- %E => regular %E (float) + ["x"] = (prefix_any * P("x")) / format_x, -- %x => regular %x (hexadecimal) + ["X"] = (prefix_any * P("X")) / format_X, -- %X => regular %X (HEXADECIMAL) + ["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal) + -- + ["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring) + ["Q"] = (prefix_any * P("Q")) / format_S, -- %Q => %q (tostring) + ["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading zeros) + ["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular) + ["C"] = (prefix_any * P("C")) / format_C, -- %c => U+.... utf character + -- + ["r"] = (prefix_any * P("r")) / format_r, -- %r => round + ["h"] = (prefix_any * P("h")) / format_h, -- %h => 0x0a1b2 (when - no 0x) was v + ["H"] = (prefix_any * P("H")) / format_H, -- %H => 0x0A1B2 (when - no 0x) was V + ["u"] = (prefix_any * P("u")) / format_u, -- %u => u+0a1b2 (when - no u+) + ["U"] = (prefix_any * P("U")) / format_U, -- %U => U+0A1B2 (when - no U+) + ["p"] = (prefix_any * P("p")) / format_p, -- %p => 12.345pt / maybe: P (and more units) + ["b"] = (prefix_any * P("b")) / format_b, -- %b => 12.342bp / maybe: B (and more units) + ["t"] = (prefix_tab * P("t")) / format_t, -- %t => concat + ["T"] = (prefix_tab * P("T")) / format_T, -- %t => sequenced + ["l"] = (prefix_any * P("l")) / format_l, -- %l => boolean + ["L"] = (prefix_any * P("L")) / format_L, -- %L => BOOLEAN + ["I"] = (prefix_any * P("I")) / format_I, -- %I => signed integer + -- + ["w"] = (prefix_any * P("w")) / format_w, -- %w => n spaces (optional prefix is added) + ["W"] = (prefix_any * P("W")) / format_W, -- %W => mandate prefix, no specifier + -- + ["j"] = (prefix_any * P("j")) / format_j, -- %j => %e (float) stripped exponent (irrational) + ["J"] = (prefix_any * P("J")) / format_J, -- %J => %E (float) stripped exponent (irrational) + -- + ["m"] = (prefix_tab * P("m")) / format_m, -- %m => xxx.xxx.xxx,xx (optional prefix instead of .) + ["M"] = (prefix_tab * P("M")) / format_M, -- %M => xxx,xxx,xxx.xx (optional prefix instead of ,) + -- + ["z"] = (prefix_any * P("z")) / format_z, -- %M => xxx,xxx,xxx.xx (optional prefix instead of ,) + -- + ["a"] = (prefix_any * P("a")) / format_a, -- %a => '...' (forces tostring) + ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring) + -- + ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest, -- rest (including %%) + ["?"] = Cs(((1-P("%"))^1 )^1) / format_rest, -- rest (including %%) + -- + ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension, +} + +-- we can be clever and only alias what is needed + +-- local direct = Cs ( +-- P("%")/"" +-- * Cc([[local format = string.format return function(str) return format("%]]) +-- * (S("+- .") + R("09"))^0 +-- * S("sqidfgGeExXo") +-- * Cc([[",str) end]]) +-- * P(-1) +-- ) + +local direct = Cs ( + P("%") + * (S("+- .") + R("09"))^0 + * S("sqidfgGeExXo") + * P(-1) / [[local format = string.format return function(str) return format("%0",str) end]] +) + +local function make(t,str) + local f + local p + local p = lpegmatch(direct,str) + if p then + -- f = loadstripped(p)() + -- print("builder 1 >",p) + f = loadstripped(p)() + else + n = 0 + -- p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n + p = lpegmatch(builder,str,1,t._connector_,t._extensions_) -- after this we know n + if n > 0 then + p = format(template,preamble,t._preamble_,arguments[n],p) + -- print("builder 2 >",p) + f = loadstripped(p,t._environment_)() -- t._environment is not populated (was experiment) + else + f = function() return str end + end + end + t[str] = f + return f +end + +-- -- collect periodically +-- +-- local threshold = 1000 -- max nof cached formats +-- +-- local function make(t,str) +-- local f = rawget(t,str) +-- if f then +-- return f +-- end +-- local parent = t._t_ +-- if parent._n_ > threshold then +-- local m = { _t_ = parent } +-- getmetatable(parent).__index = m +-- setmetatable(m, { __index = make }) +-- else +-- parent._n_ = parent._n_ + 1 +-- end +-- local f +-- local p = lpegmatch(direct,str) +-- if p then +-- f = loadstripped(p)() +-- else +-- n = 0 +-- p = lpegmatch(builder,str,1,"..",parent._extensions_) -- after this we know n +-- if n > 0 then +-- p = format(template,preamble,parent._preamble_,arguments[n],p) +-- -- print("builder>",p) +-- f = loadstripped(p)() +-- else +-- f = function() return str end +-- end +-- end +-- t[str] = f +-- return f +-- end + +local function use(t,fmt,...) + return t[fmt](...) +end + +strings.formatters = { } + +-- we cannot make these tables weak, unless we start using an indirect +-- table (metatable) in which case we could better keep a count and +-- clear that table when a threshold is reached + +-- _connector_ is an experiment + +if _LUAVERSION < 5.2 then + + function strings.formatters.new(noconcat) + local t = { _type_ = "formatter", _connector_ = noconcat and "," or "..", _extensions_ = { }, _preamble_ = preamble, _environment_ = { } } + setmetatable(t, { __index = make, __call = use }) + return t + end + +else + + function strings.formatters.new(noconcat) + local e = { } -- better make a copy as we can overload + for k, v in next, environment do + e[k] = v + end + local t = { _type_ = "formatter", _connector_ = noconcat and "," or "..", _extensions_ = { }, _preamble_ = "", _environment_ = e } + setmetatable(t, { __index = make, __call = use }) + return t + end + +end + +-- function strings.formatters.new() +-- local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter", _n_ = 0 } +-- local m = { _t_ = t } +-- setmetatable(t, { __index = m, __call = use }) +-- setmetatable(m, { __index = make }) +-- return t +-- end + +local formatters = strings.formatters.new() -- the default instance + +string.formatters = formatters -- in the main string namespace +string.formatter = function(str,...) return formatters[str](...) end -- sometimes nicer name + +local function add(t,name,template,preamble) + if type(t) == "table" and t._type_ == "formatter" then + t._extensions_[name] = template or "%s" + if type(preamble) == "string" then + t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload ! + elseif type(preamble) == "table" then + for k, v in next, preamble do + t._environment_[k] = v + end + end + end +end + +strings.formatters.add = add + +-- registered in the default instance (should we fall back on this one?) + +patterns.xmlescape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P('"')/""" + P(1))^0) +patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + P(1))^0) +patterns.luaescape = Cs(((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0) -- maybe also \0 +patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0 * Cc('"')) + +-- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but +-- faster again when other q-escapables are found (the ones we don't need to escape) + +-- add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) +-- add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) +-- add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]]) + +if _LUAVERSION < 5.2 then + + add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],"local xmlescape = lpeg.patterns.xmlescape") + add(formatters,"tex",[[lpegmatch(texescape,%s)]],"local texescape = lpeg.patterns.texescape") + add(formatters,"lua",[[lpegmatch(luaescape,%s)]],"local luaescape = lpeg.patterns.luaescape") + +else + + add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],{ xmlescape = lpeg.patterns.xmlescape }) + add(formatters,"tex",[[lpegmatch(texescape,%s)]],{ texescape = lpeg.patterns.texescape }) + add(formatters,"lua",[[lpegmatch(luaescape,%s)]],{ luaescape = lpeg.patterns.luaescape }) + +end + +-- -- yes or no: +-- +-- local function make(t,str) +-- local f +-- local p = lpegmatch(direct,str) +-- if p then +-- f = loadstripped(p)() +-- else +-- n = 0 +-- p = lpegmatch(builder,str,1,",") -- after this we know n +-- if n > 0 then +-- p = format(template,template_shortcuts,arguments[n],p) +-- f = loadstripped(p)() +-- else +-- f = function() return str end +-- end +-- end +-- t[str] = f +-- return f +-- end +-- +-- local formatteds = string.formatteds or { } +-- string.formatteds = formatteds +-- +-- setmetatable(formatteds, { __index = make, __call = use }) + +-- This is a somewhat silly one used in commandline reconstruction but the older +-- method, using a combination of fine, gsub, quoted and unquoted was not that +-- reliable. +-- +-- '"foo"bar \"and " whatever"' => "foo\"bar \"and \" whatever" +-- 'foo"bar \"and " whatever' => "foo\"bar \"and \" whatever" + +local dquote = patterns.dquote -- P('"') +local equote = patterns.escaped + dquote / '\\"' + 1 +local space = patterns.space +local cquote = Cc('"') + +local pattern = + Cs(dquote * (equote - P(-2))^0 * dquote) -- we keep the outer but escape unescaped ones + + Cs(cquote * (equote - space)^0 * space * equote^0 * cquote) -- we escape unescaped ones + +function string.optionalquoted(str) + return lpegmatch(pattern,str) or str +end |