summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/util-str.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/util-str.lua')
-rw-r--r--tex/context/base/mkiv/util-str.lua190
1 files changed, 146 insertions, 44 deletions
diff --git a/tex/context/base/mkiv/util-str.lua b/tex/context/base/mkiv/util-str.lua
index cebbc6be2..9da0c6a2f 100644
--- a/tex/context/base/mkiv/util-str.lua
+++ b/tex/context/base/mkiv/util-str.lua
@@ -12,17 +12,18 @@ local strings = utilities.strings
local format, gsub, rep, sub, find = string.format, string.gsub, string.rep, string.sub, string.find
local load, dump = load, string.dump
-local tonumber, type, tostring = tonumber, type, tostring
+local tonumber, type, tostring, next = tonumber, type, tostring, next
local unpack, concat = table.unpack, table.concat
local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc
local patterns, lpegmatch = lpeg.patterns, lpeg.match
-local utfchar, utfbyte = utf.char, utf.byte
+local utfchar, utfbyte, utflen = utf.char, utf.byte, utf.len
+
----- loadstripped = utilities.lua.loadstripped
----- setmetatableindex = table.setmetatableindex
local loadstripped = nil
-if _LUAVERSION < 5.2 then
+if LUAVERSION < 5.2 then
loadstripped = function(str,shortcuts)
return load(str)
@@ -145,6 +146,18 @@ function strings.tabtospace(str,tab)
return lpegmatch(pattern,str,1,tab or 7)
end
+function string.utfpadding(s,n)
+ if not n or n == 0 then
+ return ""
+ end
+ local l = utflen(s)
+ if n > 0 then
+ return nspaces[n-l]
+ else
+ return nspaces[-n-l]
+ end
+end
+
-- local t = {
-- "1234567123456712345671234567",
-- "\tb\tc",
@@ -281,41 +294,48 @@ end
--
-- More info can be found in cld-mkiv.pdf so here I stick to a simple list.
--
--- integer %...i number
--- integer %...d number
--- unsigned %...u number
--- character %...c number
--- hexadecimal %...x number
--- HEXADECIMAL %...X number
--- octal %...o number
--- string %...s string number
--- float %...f number
--- checked float %...F number
--- exponential %...e number
--- exponential %...E number
--- autofloat %...g number
--- autofloat %...G number
--- utf character %...c number
--- force tostring %...S any
--- force tostring %Q any
--- force tonumber %N number (strip leading zeros)
--- signed number %I number
--- rounded number %r number
--- 0xhexadecimal %...h character number
--- 0xHEXADECIMAL %...H character number
--- U+hexadecimal %...u character number
--- U+HEXADECIMAL %...U character number
--- points %p number (scaled points)
--- basepoints %b number (scaled points)
--- table concat %...t table
--- table concat %{.}t table
--- serialize %...T sequenced (no nested tables)
--- serialize %{.}T sequenced (no nested tables)
--- boolean (logic) %l boolean
--- BOOLEAN %L boolean
--- whitespace %...w
--- automatic %...a 'whatever' (string, table, ...)
--- automatic %...A "whatever" (string, table, ...)
+-- integer %...i number
+-- integer %...d number
+-- unsigned %...u number -- no tused
+-- character %...c number
+-- hexadecimal %...x number
+-- HEXADECIMAL %...X number
+-- octal %...o number
+-- string %...s string number
+-- float %...f number
+-- checked float %...F number
+-- exponential %...e number
+-- exponential %...E number
+-- stripped e %...j number
+-- stripped E %...J number
+-- autofloat %...g number
+-- autofloat %...G number
+-- utf character %...c number
+-- force tostring %...S any
+-- force tostring %Q any
+-- force tonumber %N number (strip leading zeros)
+-- signed number %I number
+-- rounded number %r number
+-- 0xhexadecimal %...h character number
+-- 0xHEXADECIMAL %...H character number
+-- U+hexadecimal %...u character number
+-- U+HEXADECIMAL %...U character number
+-- points %p number (scaled points)
+-- basepoints %b number (scaled points)
+-- table concat %...t table
+-- table concat %{.}t table
+-- serialize %...T sequenced (no nested tables)
+-- serialize %{.}T sequenced (no nested tables)
+-- boolean (logic) %l boolean
+-- BOOLEAN %L boolean
+-- whitespace %...w number
+-- whitespace %...W (fixed)
+-- automatic %...a 'whatever' (string, table, ...)
+-- automatic %...A "whatever" (string, table, ...)
+-- zap %...z skip
+-- comma/period real %...m
+-- period/comma real %...M
+-- formatted float %...k n.m
local n = 0
@@ -423,6 +443,27 @@ end
-- print(number.formatted(12345678,true))
-- print(number.formatted(1234.56,"!","?"))
+local p = Cs(
+ P("-")^0
+ * (P("0")^1/"")^0
+ * (1-P("."))^0
+ * (P(".") * P("0")^1 * P(-1)/"" + P(".")^0)
+ * P(1-P("0")^1*P(-1))^0
+ )
+
+function number.compactfloat(n,fmt)
+ if n == 0 then
+ return "0"
+ elseif n == 1 then
+ return "1"
+ end
+ n = lpegmatch(p,format(fmt or "%0.3f",n))
+ if n == "." or n == "" or n == "-" then
+ return "0"
+ end
+ return n
+end
+
local zero = P("0")^1 / ""
local plus = P("+") / ""
local minus = P("-")
@@ -483,7 +524,7 @@ return function(%s) return %s end
local preamble, environment = "", { }
-if _LUAVERSION < 5.2 then
+if LUAVERSION < 5.2 then
preamble = [[
local lpeg=lpeg
@@ -499,6 +540,7 @@ local utfchar=utf.char
local utfbyte=utf.byte
local lpegmatch=lpeg.match
local nspaces=string.nspaces
+local utfpadding=string.utfpadding
local tracedchar=string.tracedchar
local autosingle=string.autosingle
local autodouble=string.autodouble
@@ -525,6 +567,7 @@ else
utfbyte = utf.byte,
lpegmatch = lpeg.match,
nspaces = string.nspaces,
+ utfpadding = string.utfpadding,
tracedchar = string.tracedchar,
autosingle = string.autosingle,
autodouble = string.autodouble,
@@ -576,9 +619,36 @@ local format_S = function(f) -- can be optimized
end
end
+local format_right = function(f)
+ n = n + 1
+ f = tonumber(f)
+ if not f or f == 0 then
+ return format("(a%s or '')",n)
+ elseif f > 0 then
+ return format("utfpadding(a%s,%i)..a%s",n,f,n)
+ else
+ return format("a%s..utfpadding(a%s,%i)",n,n,f)
+ end
+end
+
+local format_left = function(f)
+ n = n + 1
+ f = tonumber(f)
+ if not f or f == 0 then
+ return format("(a%s or '')",n)
+ end
+ if f < 0 then
+ return format("utfpadding(a%s,%i)..a%s",n,-f,n)
+ else
+ return format("a%s..utfpadding(a%s,%i)",n,n,-f)
+ end
+end
+
local format_q = function()
n = n + 1
- return format("(a%s and format('%%q',a%s) or '')",n,n) -- goodie: nil check (maybe separate lpeg, not faster)
+ -- lua 5.3 has a different q than lua 5.2 (which does a tostring on numbers)
+ -- return format("(a%s ~= nil and format('%%q',a%s) or '')",n,n)
+ return format("(a%s ~= nil and format('%%q',tostring(a%s)) or '')",n,n)
end
local format_Q = function() -- can be optimized
@@ -907,6 +977,9 @@ local builder = Cs { "start",
+ V("m") + V("M") -- new (formatted number)
+ V("z") -- new
--
+ + V(">") -- left padding
+ + V("<") -- right padding
+ --
-- + V("?") -- ignored, probably messed up %
)
+ V("*")
@@ -929,7 +1002,7 @@ local builder = Cs { "start",
["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal)
--
["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring)
- ["Q"] = (prefix_any * P("Q")) / format_S, -- %Q => %q (tostring)
+ ["Q"] = (prefix_any * P("Q")) / format_Q, -- %Q => %q (tostring)
["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading zeros)
["k"] = (prefix_sub * P("k")) / format_k, -- %k => like f but with n.m
["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular)
@@ -957,11 +1030,14 @@ local builder = Cs { "start",
["m"] = (prefix_tab * P("m")) / format_m, -- %m => xxx.xxx.xxx,xx (optional prefix instead of .)
["M"] = (prefix_tab * P("M")) / format_M, -- %M => xxx,xxx,xxx.xx (optional prefix instead of ,)
--
- ["z"] = (prefix_any * P("z")) / format_z, -- %M => xxx,xxx,xxx.xx (optional prefix instead of ,)
+ ["z"] = (prefix_any * P("z")) / format_z, -- %z => skip n arguments
--
["a"] = (prefix_any * P("a")) / format_a, -- %a => '...' (forces tostring)
["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring)
--
+ ["<"] = (prefix_any * P("<")) / format_left,
+ [">"] = (prefix_any * P(">")) / format_right,
+ --
["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest, -- rest (including %%)
["?"] = Cs(((1-P("%"))^1 )^1) / format_rest, -- rest (including %%)
--
@@ -986,6 +1062,14 @@ local direct = Cs (
* P(-1) / [[local format = string.format return function(str) return format("%0",str) end]]
)
+-- local direct = Cs (
+-- P("%")
+-- * (S("+- .") + R("09"))^0
+-- * S("sqidfgGeExXo")
+-- * (1-P("%"))^0
+-- * P(-1) / [[local format = string.format return function(str) return format([==[%0]==],str) end]]
+-- )
+
local function make(t,str)
local f
local p
@@ -1058,7 +1142,7 @@ strings.formatters = { }
-- _connector_ is an experiment
-if _LUAVERSION < 5.2 then
+if LUAVERSION < 5.2 then
function strings.formatters.new(noconcat)
local t = { _type_ = "formatter", _connector_ = noconcat and "," or "..", _extensions_ = { }, _preamble_ = preamble, _environment_ = { } }
@@ -1118,7 +1202,7 @@ patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"
-- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but
-- faster again when other q-escapables are found (the ones we don't need to escape)
-if _LUAVERSION < 5.2 then
+if LUAVERSION < 5.2 then
add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],"local xmlescape = lpeg.patterns.xmlescape")
add(formatters,"tex",[[lpegmatch(texescape,%s)]],"local texescape = lpeg.patterns.texescape")
@@ -1183,3 +1267,21 @@ local pattern = Cs((newline / (os.newline or "\r") + 1)^0)
function string.replacenewlines(str)
return lpegmatch(pattern,str)
end
+
+--
+
+function strings.newcollector()
+ local result, r = { }, 0
+ return
+ function(fmt,str,...) -- write
+ r = r + 1
+ result[r] = str == nil and fmt or formatters[fmt](str,...)
+ end,
+ function(connector) -- flush
+ if result then
+ local str = concat(result,connector)
+ result, r = { }, 0
+ return str
+ end
+ end
+end