summaryrefslogtreecommitdiff
path: root/lualibs-string.lua
blob: 03616aa19df6ecedf27aa7d0775b18d71d723514 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
if not modules then modules = { } end modules ['l-string'] = {
    version   = 1.001,
    comment   = "companion to luat-lib.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

local string = string
local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct

-- some functions may disappear as they are not used anywhere

if not string.split then

    -- this will be overloaded by a faster lpeg variant

    function string.split(str,pattern)
        local t = { }
        if #str > 0 then
            local n = 1
            for s in gmatch(str..pattern,"(.-)"..pattern) do
                t[n] = s
                n = n + 1
            end
        end
        return t
    end

end

function string.unquoted(str)
    return (gsub(str,"^([\"\'])(.*)%1$","%2"))
end

--~ function stringunquoted(str)
--~     if find(str,"^[\'\"]") then
--~         return sub(str,2,-2)
--~     else
--~         return str
--~     end
--~ end

function string.quoted(str)
    return format("%q",str) -- always "
end

function string.count(str,pattern) -- variant 3
    local n = 0
    for _ in gmatch(str,pattern) do -- not for utf
        n = n + 1
    end
    return n
end

function string.limit(str,n,sentinel) -- not utf proof
    if #str > n then
        sentinel = sentinel or "..."
        return sub(str,1,(n-#sentinel)) .. sentinel
    else
        return str
    end
end

local space    = S(" \t\v\n")
local nospace  = 1 - space
local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code

function string.strip(str)
    return lpegmatch(stripper,str) or ""
end

function string.is_empty(str)
    return not find(str,"%S")
end

local patterns_escapes = {
    ["%"] = "%%",
    ["."] = "%.",
    ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
    ["["] = "%[", ["]"] = "%]",
    ["("] = "%(", [")"] = "%)",
 -- ["{"] = "%{", ["}"] = "%}"
 -- ["^"] = "%^", ["$"] = "%$",
}

local simple_escapes = {
    ["-"] = "%-",
    ["."] = "%.",
    ["?"] = ".",
    ["*"] = ".*",
}

function string.escapedpattern(str,simple)
    return (gsub(str,".",simple and simple_escapes or patterns_escapes))
end

function string.topattern(str,lowercase,strict)
    if str == "" then
        return ".*"
    else
        str = gsub(str,".",simple_escapes)
        if lowercase then
            str = lower(str)
        end
        if strict then
            return "^" .. str .. "$"
        else
            return str
        end
    end
end


function string.valid(str,default)
    return (type(str) == "string" and str ~= "" and str) or default or nil
end

-- obsolete names:

string.quote   = string.quoted
string.unquote = string.unquoted

-- handy fallback

string.itself  = function(s) return s end

-- also handy (see utf variant)

local pattern = Ct(C(1)^0)

function string.totable(str)
    return lpegmatch(pattern,str)
end