summaryrefslogtreecommitdiff
path: root/lualibs-string.lua
diff options
context:
space:
mode:
authorPhilipp Gesang <philipp.gesang@alumni.uni-heidelberg.de>2012-10-19 19:08:51 +0200
committerPhilipp Gesang <philipp.gesang@alumni.uni-heidelberg.de>2012-10-19 19:08:51 +0200
commitd294bdc989812cc84c07c73d2c63dfae00ef58af (patch)
tree5b2801e97c911fb1d119cc50bd6bf256cfcc1be2 /lualibs-string.lua
parent1456bd5c2c1458bcdee99739ce2255e6b8939768 (diff)
downloadlualibs-d294bdc989812cc84c07c73d2c63dfae00ef58af.tar.gz
update l-string
Diffstat (limited to 'lualibs-string.lua')
-rw-r--r--lualibs-string.lua285
1 files changed, 66 insertions, 219 deletions
diff --git a/lualibs-string.lua b/lualibs-string.lua
index 9856d52..03616aa 100644
--- a/lualibs-string.lua
+++ b/lualibs-string.lua
@@ -6,8 +6,9 @@ if not modules then modules = { } end modules ['l-string'] = {
license = "see context related readme files"
}
+local string = string
local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
-local lpegmatch = lpeg.match
+local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct
-- some functions may disappear as they are not used anywhere
@@ -15,196 +16,75 @@ if not string.split then
-- this will be overloaded by a faster lpeg variant
- function string:split(pattern)
- if #self > 0 then
- local t = { }
- for s in gmatch(self..pattern,"(.-)"..pattern) do
- t[#t+1] = s
+ function string.split(str,pattern)
+ local t = { }
+ if #str > 0 then
+ local n = 1
+ for s in gmatch(str..pattern,"(.-)"..pattern) do
+ t[n] = s
+ n = n + 1
end
- return t
- else
- return { }
end
+ return t
end
end
-local chr_to_esc = {
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["^"] = "%^", ["$"] = "%$",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%(", [")"] = "%)",
- ["{"] = "%{", ["}"] = "%}"
-}
-
-string.chr_to_esc = chr_to_esc
-
-function string:esc() -- variant 2
- return (gsub(self,"(.)",chr_to_esc))
-end
-
-function string:unquote()
- return (gsub(self,"^([\"\'])(.*)%1$","%2"))
+function string.unquoted(str)
+ return (gsub(str,"^([\"\'])(.*)%1$","%2"))
end
---~ function string:unquote()
---~ if find(self,"^[\'\"]") then
---~ return sub(self,2,-2)
+--~ function stringunquoted(str)
+--~ if find(str,"^[\'\"]") then
+--~ return sub(str,2,-2)
--~ else
---~ return self
+--~ return str
--~ end
--~ end
-function string:quote() -- we could use format("%q")
- return format("%q",self)
+function string.quoted(str)
+ return format("%q",str) -- always "
end
-function string:count(pattern) -- variant 3
+function string.count(str,pattern) -- variant 3
local n = 0
- for _ in gmatch(self,pattern) do
+ for _ in gmatch(str,pattern) do -- not for utf
n = n + 1
end
return n
end
-function string:limit(n,sentinel)
- if #self > n then
- sentinel = sentinel or " ..."
- return sub(self,1,(n-#sentinel)) .. sentinel
+function string.limit(str,n,sentinel) -- not utf proof
+ if #str > n then
+ sentinel = sentinel or "..."
+ return sub(str,1,(n-#sentinel)) .. sentinel
else
- return self
+ return str
end
end
---~ function string:strip() -- the .- is quite efficient
---~ -- return match(self,"^%s*(.-)%s*$") or ""
---~ -- return match(self,'^%s*(.*%S)') or '' -- posted on lua list
---~ return find(s,'^%s*$') and '' or match(s,'^%s*(.*%S)')
---~ end
-
-do -- roberto's variant:
- local space = lpeg.S(" \t\v\n")
- local nospace = 1 - space
- local stripper = space^0 * lpeg.C((space^0 * nospace^1)^0)
- function string.strip(str)
- return lpegmatch(stripper,str) or ""
- end
-end
+local space = S(" \t\v\n")
+local nospace = 1 - space
+local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code
-function string:is_empty()
- return not find(self,"%S")
+function string.strip(str)
+ return lpegmatch(stripper,str) or ""
end
-function string:enhance(pattern,action)
- local ok, n = true, 0
- while ok do
- ok = false
- self = gsub(self,pattern, function(...)
- ok, n = true, n + 1
- return action(...)
- end)
- end
- return self, n
-end
-
-local chr_to_hex, hex_to_chr = { }, { }
-
-for i=0,255 do
- local c, h = char(i), format("%02X",i)
- chr_to_hex[c], hex_to_chr[h] = h, c
-end
-
-function string:to_hex()
- return (gsub(self or "","(.)",chr_to_hex))
-end
-
-function string:from_hex()
- return (gsub(self or "","(..)",hex_to_chr))
-end
-
-if not string.characters then
-
- local function nextchar(str, index)
- index = index + 1
- return (index <= #str) and index or nil, sub(str,index,index)
- end
- function string:characters()
- return nextchar, self, 0
- end
- local function nextbyte(str, index)
- index = index + 1
- return (index <= #str) and index or nil, byte(sub(str,index,index))
- end
- function string:bytes()
- return nextbyte, self, 0
- end
-
-end
-
--- we can use format for this (neg n)
-
-function string:rpadd(n,chr)
- local m = n-#self
- if m > 0 then
- return self .. rep(chr or " ",m)
- else
- return self
- end
-end
-
-function string:lpadd(n,chr)
- local m = n-#self
- if m > 0 then
- return rep(chr or " ",m) .. self
- else
- return self
- end
-end
-
-string.padd = string.rpadd
-
-function is_number(str) -- tonumber
- return find(str,"^[%-%+]?[%d]-%.?[%d+]$") == 1
-end
-
---~ print(is_number("1"))
---~ print(is_number("1.1"))
---~ print(is_number(".1"))
---~ print(is_number("-0.1"))
---~ print(is_number("+0.1"))
---~ print(is_number("-.1"))
---~ print(is_number("+.1"))
-
-function string:split_settings() -- no {} handling, see l-aux for lpeg variant
- if find(self,"=") then
- local t = { }
- for k,v in gmatch(self,"(%a+)=([^%,]*)") do
- t[k] = v
- end
- return t
- else
- return nil
- end
+function string.is_empty(str)
+ return not find(str,"%S")
end
local patterns_escapes = {
- ["-"] = "%-",
- ["."] = "%.",
- ["+"] = "%+",
- ["*"] = "%*",
["%"] = "%%",
- ["("] = "%)",
- [")"] = "%)",
- ["["] = "%[",
- ["]"] = "%]",
+ ["."] = "%.",
+ ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
+ ["["] = "%[", ["]"] = "%]",
+ ["("] = "%(", [")"] = "%)",
+ -- ["{"] = "%{", ["}"] = "%}"
+ -- ["^"] = "%^", ["$"] = "%$",
}
-function string:pattesc()
- return (gsub(self,".",patterns_escapes))
-end
-
local simple_escapes = {
["-"] = "%-",
["."] = "%.",
@@ -212,77 +92,44 @@ local simple_escapes = {
["*"] = ".*",
}
-function string:simpleesc()
- return (gsub(self,".",simple_escapes))
+function string.escapedpattern(str,simple)
+ return (gsub(str,".",simple and simple_escapes or patterns_escapes))
end
-function string:tohash()
- local t = { }
- for s in gmatch(self,"([^, ]+)") do -- lpeg
- t[s] = true
+function string.topattern(str,lowercase,strict)
+ if str == "" then
+ return ".*"
+ else
+ str = gsub(str,".",simple_escapes)
+ if lowercase then
+ str = lower(str)
+ end
+ if strict then
+ return "^" .. str .. "$"
+ else
+ return str
+ end
end
- return t
end
-local pattern = lpeg.Ct(lpeg.C(1)^0)
-function string:totable()
- return lpegmatch(pattern,self)
+function string.valid(str,default)
+ return (type(str) == "string" and str ~= "" and str) or default or nil
end
---~ local t = {
---~ "1234567123456712345671234567",
---~ "a\tb\tc",
---~ "aa\tbb\tcc",
---~ "aaa\tbbb\tccc",
---~ "aaaa\tbbbb\tcccc",
---~ "aaaaa\tbbbbb\tccccc",
---~ "aaaaaa\tbbbbbb\tcccccc",
---~ }
---~ for k,v do
---~ print(string.tabtospace(t[k]))
---~ end
+-- obsolete names:
-function string.tabtospace(str,tab)
- -- we don't handle embedded newlines
- while true do
- local s = find(str,"\t")
- if s then
- if not tab then tab = 7 end -- only when found
- local d = tab-(s-1) % tab
- if d > 0 then
- str = gsub(str,"\t",rep(" ",d),1)
- else
- str = gsub(str,"\t","",1)
- end
- else
- break
- end
- end
- return str
-end
+string.quote = string.quoted
+string.unquote = string.unquoted
-function string:compactlong() -- strips newlines and leading spaces
- self = gsub(self,"[\n\r]+ *","")
- self = gsub(self,"^ *","")
- return self
-end
+-- handy fallback
-function string:striplong() -- strips newlines and leading spaces
- self = gsub(self,"^%s*","")
- self = gsub(self,"[\n\r]+ *","\n")
- return self
-end
+string.itself = function(s) return s end
-function string:topattern(lowercase,strict)
- if lowercase then
- self = lower(self)
- end
- self = gsub(self,".",simple_escapes)
- if self == "" then
- self = ".*"
- elseif strict then
- self = "^" .. self .. "$"
- end
- return self
+-- also handy (see utf variant)
+
+local pattern = Ct(C(1)^0)
+
+function string.totable(str)
+ return lpegmatch(pattern,str)
end