summaryrefslogtreecommitdiff
path: root/tex/generic
diff options
context:
space:
mode:
Diffstat (limited to 'tex/generic')
-rw-r--r--tex/generic/context/luatex/luatex-fonts-merged.lua83
1 files changed, 73 insertions, 10 deletions
diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua
index 78606488a..f17a3e045 100644
--- a/tex/generic/context/luatex/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
-- merged file : luatex-fonts-merged.lua
-- parent file : luatex-fonts.lua
--- merge date : 08/16/12 22:20:23
+-- merge date : 08/25/12 12:53:08
do -- begin closure to overcome local limits and interference
@@ -1131,6 +1131,8 @@ local lpeg = require("lpeg")
-- tracing (only used when we encounter a problem in integration of lpeg in luatex)
+-- some code will move to unicode and string
+
local report = texio and texio.write_nl or print
-- Watch this: Lua does some juggling with replacement values and although lpeg itself is agnostic of
@@ -1193,7 +1195,7 @@ local byte, char, gmatch, format = string.byte, string.char, string.gmatch, stri
lpeg.patterns = lpeg.patterns or { } -- so that we can share
local patterns = lpeg.patterns
-local P, R, S, V, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc
+local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp
local lpegtype, lpegmatch = lpeg.type, lpeg.match
local utfcharacters = string.utfcharacters
@@ -1422,6 +1424,57 @@ function string.utfsplitlines(str)
return lpegmatch(utflinesplitter,str or "")
end
+local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0)
+local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0)
+
+function string.utfsplit(str,ignorewhitespace) -- new
+ if ignorewhitespace then
+ return lpegmatch(utfcharsplitter_iws,str or "")
+ else
+ return lpegmatch(utfcharsplitter_ows,str or "")
+ end
+end
+
+-- inspect(string.utfsplit("a b c d"))
+-- inspect(string.utfsplit("a b c d",true))
+
+-- -- alternative 1: 0.77
+--
+-- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0)
+--
+-- function string.utflength(str)
+-- return #lpegmatch(utfcharcounter,str or "")
+-- end
+--
+-- -- alternative 2: 1.70
+--
+-- local n = 0
+--
+-- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow
+--
+-- function string.utflength(str)
+-- n = 0
+-- lpegmatch(utfcharcounter,str or "")
+-- return n
+-- end
+--
+-- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
+
+local n = 0
+
+local utfcharcounter = utfbom^-1 * Cs ( (
+ Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
+ + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
+ + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
+ + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
+)^0 )
+
+function string.utflength(str)
+ n = 0
+ lpegmatch(utfcharcounter,str or "")
+ return n
+end
+
--~ lpeg.splitters = cache -- no longer public
local cache = { }
@@ -1510,33 +1563,43 @@ function lpeg.replacer(one,two)
if type(one) == "table" then
local no = #one
local p
- if no > 0 then
- for i=1,no do
- local o = one[i]
- local pp = P(o[1]) / o[2]
+ if no == 0 then
+ for k, v in next, one do
+ local pp = P(k) / v
if p then
p = p + pp
else
p = pp
end
end
+ return Cs((p + 1)^0)
+ elseif no == 1 then
+ local o = one[1]
+ one, two = P(o[1]), o[2]
+ return Cs(((1-one)^1 + one/two)^0)
else
- for k, v in next, one do
- local pp = P(k) / v
+ for i=1,no do
+ local o = one[i]
+ local pp = P(o[1]) / o[2]
if p then
p = p + pp
else
p = pp
end
end
+ return Cs((p + 1)^0)
end
- return Cs((p + 1)^0)
else
+ one = P(one)
two = two or ""
- return Cs((P(one)/two + 1)^0)
+ return Cs(((1-one)^1 + one/two)^0)
end
end
+-- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
+-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
+-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
+
local splitters_f, splitters_s = { }, { }
function lpeg.firstofsplit(separator) -- always return value