summaryrefslogtreecommitdiff
path: root/tex/context/base/util-str.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/util-str.lua')
-rw-r--r--tex/context/base/util-str.lua354
1 files changed, 296 insertions, 58 deletions
diff --git a/tex/context/base/util-str.lua b/tex/context/base/util-str.lua
index af8b1651e..de4a87e9f 100644
--- a/tex/context/base/util-str.lua
+++ b/tex/context/base/util-str.lua
@@ -20,21 +20,44 @@ local utfchar, utfbyte = utf.char, utf.byte
----- loadstripped = utilities.lua.loadstripped
----- setmetatableindex = table.setmetatableindex
-local loadstripped = _LUAVERSION < 5.2 and load or function(str)
- return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load
+local loadstripped = nil
+
+if _LUAVERSION < 5.2 then
+
+ loadstripped = function(str,shortcuts)
+ return load(str)
+ end
+
+else
+
+ loadstripped = function(str,shortcuts)
+ if shortcuts then
+ return load(dump(load(str),true),nil,nil,shortcuts)
+ else
+ return load(dump(load(str),true))
+ end
+ end
+
end
-- todo: make a special namespace for the formatter
if not number then number = { } end -- temp hack for luatex-fonts
-local stripper = patterns.stripzeros
+local stripper = patterns.stripzeros
+local newline = patterns.newline
+local endofstring = patterns.endofstring
+local whitespace = patterns.whitespace
+local spacer = patterns.spacer
+local spaceortab = patterns.spaceortab
local function points(n)
+ n = tonumber(n)
return (not n or n == 0) and "0pt" or lpegmatch(stripper,format("%.5fpt",n/65536))
end
local function basepoints(n)
+ n = tonumber(n)
return (not n or n == 0) and "0bp" or lpegmatch(stripper,format("%.5fbp", n*(7200/7227)/65536))
end
@@ -44,12 +67,12 @@ number.basepoints = basepoints
-- str = " \n \ntest \n test\ntest "
-- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]")
-local rubish = patterns.spaceortab^0 * patterns.newline
-local anyrubish = patterns.spaceortab + patterns.newline
+local rubish = spaceortab^0 * newline
+local anyrubish = spaceortab + newline
local anything = patterns.anything
-local stripped = (patterns.spaceortab^1 / "") * patterns.newline
+local stripped = (spaceortab^1 / "") * newline
local leading = rubish^0 / ""
-local trailing = (anyrubish^1 * patterns.endofstring) / ""
+local trailing = (anyrubish^1 * endofstring) / ""
local redundant = rubish^3 / "\n"
local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0)
@@ -111,7 +134,7 @@ local pattern =
return ""
end
end
- + patterns.newline * Cp() / function(position)
+ + newline * Cp() / function(position)
extra, start = 0, position
end
+ patterns.anything
@@ -136,17 +159,105 @@ end
-- print(strings.tabtospace(t[k]))
-- end
-function strings.striplong(str) -- strips all leading spaces
- str = gsub(str,"^%s*","")
- str = gsub(str,"[\n\r]+ *","\n")
- return str
+-- todo: lpeg
+
+-- function strings.striplong(str) -- strips all leading spaces
+-- str = gsub(str,"^%s*","")
+-- str = gsub(str,"[\n\r]+ *","\n")
+-- return str
+-- end
+
+local space = spacer^0
+local nospace = space/""
+local endofline = nospace * newline
+
+local stripend = (whitespace^1 * endofstring)/""
+
+local normalline = (nospace * ((1-space*(newline+endofstring))^1) * nospace)
+
+local stripempty = endofline^1/""
+local normalempty = endofline^1
+local singleempty = endofline * (endofline^0/"")
+local doubleempty = endofline * endofline^-1 * (endofline^0/"")
+
+local stripstart = stripempty^0
+
+local p_prune_normal = Cs ( stripstart * ( stripend + normalline + normalempty )^0 )
+local p_prune_collapse = Cs ( stripstart * ( stripend + normalline + doubleempty )^0 )
+local p_prune_noempty = Cs ( stripstart * ( stripend + normalline + singleempty )^0 )
+local p_retain_normal = Cs ( ( normalline + normalempty )^0 )
+local p_retain_collapse = Cs ( ( normalline + doubleempty )^0 )
+local p_retain_noempty = Cs ( ( normalline + singleempty )^0 )
+
+-- function striplines(str,prune,collapse,noempty)
+-- if prune then
+-- if noempty then
+-- return lpegmatch(p_prune_noempty,str) or str
+-- elseif collapse then
+-- return lpegmatch(p_prune_collapse,str) or str
+-- else
+-- return lpegmatch(p_prune_normal,str) or str
+-- end
+-- else
+-- if noempty then
+-- return lpegmatch(p_retain_noempty,str) or str
+-- elseif collapse then
+-- return lpegmatch(p_retain_collapse,str) or str
+-- else
+-- return lpegmatch(p_retain_normal,str) or str
+-- end
+-- end
+-- end
+
+local striplinepatterns = {
+ ["prune"] = p_prune_normal,
+ ["prune and collapse"] = p_prune_collapse, -- default
+ ["prune and no empty"] = p_prune_noempty,
+ ["retain"] = p_retain_normal,
+ ["retain and collapse"] = p_retain_collapse,
+ ["retain and no empty"] = p_retain_noempty,
+ ["collapse"] = patterns.collapser, -- how about: stripper fullstripper
+}
+
+setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end })
+
+strings.striplinepatterns = striplinepatterns
+
+function strings.striplines(str,how)
+ return str and lpegmatch(striplinepatterns[how],str) or str
end
--- local template = string.striplong([[
+-- also see: string.collapsespaces
+
+strings.striplong = strings.striplines -- for old times sake
+
+-- local str = table.concat( {
+-- " ",
+-- " aap",
+-- " noot mies",
+-- " ",
+-- " ",
+-- " zus wim jet",
+-- "zus wim jet",
+-- " zus wim jet",
+-- " ",
+-- }, "\n")
+
+-- local str = table.concat( {
+-- " aaaa",
+-- " bb",
+-- " cccccc",
+-- }, "\n")
+
+-- for k, v in table.sortedhash(utilities.strings.striplinepatterns) do
+-- logs.report("stripper","method: %s, result: [[%s]]",k,utilities.strings.striplines(str,k))
+-- end
+
+-- inspect(strings.striplong([[
-- aaaa
-- bb
-- cccccc
--- ]])
+-- ]]))
function strings.nice(str)
str = gsub(str,"[:%-+_]+"," ") -- maybe more
@@ -178,6 +289,7 @@ end
-- octal %...o number
-- string %...s string number
-- float %...f number
+-- checked float %...F number
-- exponential %...e number
-- exponential %...E number
-- autofloat %...g number
@@ -249,10 +361,10 @@ strings.tracers = tracedchars
function string.tracedchar(b)
-- todo: table
if type(b) == "number" then
- return tracedchars[b] or (utfchar(b) .. " (U+" .. format('%05X',b) .. ")")
+ return tracedchars[b] or (utfchar(b) .. " (U+" .. format("%05X",b) .. ")")
else
local c = utfbyte(b)
- return tracedchars[c] or (b .. " (U+" .. format('%05X',c) .. ")")
+ return tracedchars[c] or (b .. " (U+" .. (c and format("%05X",c) or "?????") .. ")")
end
end
@@ -291,33 +403,67 @@ function number.sparseexponent(f,n)
return tostring(n)
end
-local preamble = [[
-local type = type
-local tostring = tostring
-local tonumber = tonumber
-local format = string.format
-local concat = table.concat
-local signed = number.signed
-local points = number.points
-local basepoints = number.basepoints
-local utfchar = utf.char
-local utfbyte = utf.byte
-local lpegmatch = lpeg.match
-local nspaces = string.nspaces
-local tracedchar = string.tracedchar
-local autosingle = string.autosingle
-local autodouble = string.autodouble
-local sequenced = table.sequenced
-local formattednumber = number.formatted
-local sparseexponent = number.sparseexponent
-]]
-
local template = [[
%s
%s
return function(%s) return %s end
]]
+local preamble, environment = "", { }
+
+if _LUAVERSION < 5.2 then
+
+ preamble = [[
+local lpeg=lpeg
+local type=type
+local tostring=tostring
+local tonumber=tonumber
+local format=string.format
+local concat=table.concat
+local signed=number.signed
+local points=number.points
+local basepoints= number.basepoints
+local utfchar=utf.char
+local utfbyte=utf.byte
+local lpegmatch=lpeg.match
+local nspaces=string.nspaces
+local tracedchar=string.tracedchar
+local autosingle=string.autosingle
+local autodouble=string.autodouble
+local sequenced=table.sequenced
+local formattednumber=number.formatted
+local sparseexponent=number.sparseexponent
+ ]]
+
+else
+
+ environment = {
+ global = global or _G,
+ lpeg = lpeg,
+ type = type,
+ tostring = tostring,
+ tonumber = tonumber,
+ format = string.format,
+ concat = table.concat,
+ signed = number.signed,
+ points = number.points,
+ basepoints = number.basepoints,
+ utfchar = utf.char,
+ utfbyte = utf.byte,
+ lpegmatch = lpeg.match,
+ nspaces = string.nspaces,
+ tracedchar = string.tracedchar,
+ autosingle = string.autosingle,
+ autodouble = string.autodouble,
+ sequenced = table.sequenced,
+ formattednumber = number.formatted,
+ sparseexponent = number.sparseexponent,
+ }
+
+end
+
+-- -- --
+
local arguments = { "a1" } -- faster than previously used (select(n,...))
setmetatable(arguments, { __index =
@@ -368,7 +514,7 @@ local format_i = function(f)
if f and f ~= "" then
return format("format('%%%si',a%s)",f,n)
else
- return format("format('%%i',a%s)",n)
+ return format("format('%%i',a%s)",n) -- why not just tostring()
end
end
@@ -384,6 +530,24 @@ local format_f = function(f)
return format("format('%%%sf',a%s)",f,n)
end
+-- The next one formats an integer as integer and very small values as zero. This is needed
+-- for pdf backend code.
+--
+-- 1.23 % 1 : 0.23
+-- - 1.23 % 1 : 0.77
+--
+-- We could probably use just %s with integers but who knows what Lua 5.3 will do? So let's
+-- for the moment use %i.
+
+local format_F = function(f) -- beware, no cast to number
+ n = n + 1
+ if not f or f == "" then
+ return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n)
+ else
+ return format("format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)",n,f,n)
+ end
+end
+
local format_g = function(f)
n = n + 1
return format("format('%%%sg',a%s)",f,n)
@@ -657,7 +821,7 @@ local builder = Cs { "start",
V("!") -- new
+ V("s") + V("q")
+ V("i") + V("d")
- + V("f") + V("g") + V("G") + V("e") + V("E")
+ + V("f") + V("F") + V("g") + V("G") + V("e") + V("E")
+ V("x") + V("X") + V("o")
--
+ V("c")
@@ -680,7 +844,7 @@ local builder = Cs { "start",
+ V("m") + V("M") -- new
+ V("z") -- new
--
- + V("*") -- ignores probably messed up %
+ -- + V("?") -- ignores probably messed up %
)
+ V("*")
)
@@ -692,6 +856,7 @@ local builder = Cs { "start",
["i"] = (prefix_any * P("i")) / format_i, -- %i => regular %i (integer)
["d"] = (prefix_any * P("d")) / format_d, -- %d => regular %d (integer)
["f"] = (prefix_any * P("f")) / format_f, -- %f => regular %f (float)
+ ["F"] = (prefix_any * P("F")) / format_F, -- %F => regular %f (float) but 0/1 check
["g"] = (prefix_any * P("g")) / format_g, -- %g => regular %g (float)
["G"] = (prefix_any * P("G")) / format_G, -- %G => regular %G (float)
["e"] = (prefix_any * P("e")) / format_e, -- %e => regular %e (float)
@@ -734,34 +899,45 @@ local builder = Cs { "start",
["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring)
--
["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest, -- rest (including %%)
+ ["?"] = Cs(((1-P("%"))^1 )^1) / format_rest, -- rest (including %%)
--
["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension,
}
-- we can be clever and only alias what is needed
+-- local direct = Cs (
+-- P("%")/""
+-- * Cc([[local format = string.format return function(str) return format("%]])
+-- * (S("+- .") + R("09"))^0
+-- * S("sqidfgGeExXo")
+-- * Cc([[",str) end]])
+-- * P(-1)
+-- )
+
local direct = Cs (
- P("%")/""
- * Cc([[local format = string.format return function(str) return format("%]])
- * (S("+- .") + R("09"))^0
- * S("sqidfgGeExXo")
- * Cc([[",str) end]])
- * P(-1)
- )
+ P("%")
+ * (S("+- .") + R("09"))^0
+ * S("sqidfgGeExXo")
+ * P(-1) / [[local format = string.format return function(str) return format("%0",str) end]]
+)
local function make(t,str)
local f
local p
local p = lpegmatch(direct,str)
if p then
+ -- f = loadstripped(p)()
+ -- print("builder 1 >",p)
f = loadstripped(p)()
else
n = 0
- p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n
+ -- p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n
+ p = lpegmatch(builder,str,1,t._connector_,t._extensions_) -- after this we know n
if n > 0 then
p = format(template,preamble,t._preamble_,arguments[n],p)
--- print("builder>",p)
- f = loadstripped(p)()
+ -- print("builder 2 >",p)
+ f = loadstripped(p,t._environment_)() -- t._environment is not populated (was experiment)
else
f = function() return str end
end
@@ -816,10 +992,28 @@ strings.formatters = { }
-- table (metatable) in which case we could better keep a count and
-- clear that table when a threshold is reached
-function strings.formatters.new()
- local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter" }
- setmetatable(t, { __index = make, __call = use })
- return t
+-- _connector_ is an experiment
+
+if _LUAVERSION < 5.2 then
+
+ function strings.formatters.new(noconcat)
+ local t = { _type_ = "formatter", _connector_ = noconcat and "," or "..", _extensions_ = { }, _preamble_ = preamble, _environment_ = { } }
+ setmetatable(t, { __index = make, __call = use })
+ return t
+ end
+
+else
+
+ function strings.formatters.new(noconcat)
+ local e = { } -- better make a copy as we can overload
+ for k, v in next, environment do
+ e[k] = v
+ end
+ local t = { _type_ = "formatter", _connector_ = noconcat and "," or "..", _extensions_ = { }, _preamble_ = "", _environment_ = e }
+ setmetatable(t, { __index = make, __call = use })
+ return t
+ end
+
end
-- function strings.formatters.new()
@@ -838,8 +1032,12 @@ string.formatter = function(str,...) return formatters[str](...) end -- someti
local function add(t,name,template,preamble)
if type(t) == "table" and t._type_ == "formatter" then
t._extensions_[name] = template or "%s"
- if preamble then
+ if type(preamble) == "string" then
t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload !
+ elseif type(preamble) == "table" then
+ for k, v in next, preamble do
+ t._environment_[k] = v
+ end
end
end
end
@@ -856,9 +1054,23 @@ patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"
-- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but
-- faster again when other q-escapables are found (the ones we don't need to escape)
-add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]])
-add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]])
-add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]])
+-- add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]])
+-- add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]])
+-- add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]])
+
+if _LUAVERSION < 5.2 then
+
+ add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],"local xmlescape = lpeg.patterns.xmlescape")
+ add(formatters,"tex",[[lpegmatch(texescape,%s)]],"local texescape = lpeg.patterns.texescape")
+ add(formatters,"lua",[[lpegmatch(luaescape,%s)]],"local luaescape = lpeg.patterns.luaescape")
+
+else
+
+ add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],{ xmlescape = lpeg.patterns.xmlescape })
+ add(formatters,"tex",[[lpegmatch(texescape,%s)]],{ texescape = lpeg.patterns.texescape })
+ add(formatters,"lua",[[lpegmatch(luaescape,%s)]],{ luaescape = lpeg.patterns.luaescape })
+
+end
-- -- yes or no:
--
@@ -885,3 +1097,29 @@ add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patte
-- string.formatteds = formatteds
--
-- setmetatable(formatteds, { __index = make, __call = use })
+
+-- This is a somewhat silly one used in commandline reconstruction but the older
+-- method, using a combination of fine, gsub, quoted and unquoted was not that
+-- reliable.
+--
+-- '"foo"bar \"and " whatever"' => "foo\"bar \"and \" whatever"
+-- 'foo"bar \"and " whatever' => "foo\"bar \"and \" whatever"
+
+local dquote = patterns.dquote -- P('"')
+local equote = patterns.escaped + dquote / '\\"' + 1
+local space = patterns.space
+local cquote = Cc('"')
+
+local pattern =
+ Cs(dquote * (equote - P(-2))^0 * dquote) -- we keep the outer but escape unescaped ones
+ + Cs(cquote * (equote - space)^0 * space * equote^0 * cquote) -- we escape unescaped ones
+
+function string.optionalquoted(str)
+ return lpegmatch(pattern,str) or str
+end
+
+local pattern = Cs((newline / os.newline + 1)^0)
+
+function string.replacenewlines(str)
+ return lpegmatch(pattern,str)
+end