diff options
author | Marius <mariausol@gmail.com> | 2013-09-18 20:20:28 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2013-09-18 20:20:28 +0300 |
commit | c81421a932e43f45d2c7ef7ed6a38411ea53a66c (patch) | |
tree | 1ad9d8366a051b0a887228388e380aafe1813b1d /tex | |
parent | 2f83c855f1b02daf4560b7861e023eaf2716f9e3 (diff) | |
download | context-c81421a932e43f45d2c7ef7ed6a38411ea53a66c.tar.gz |
beta 2013.09.18 19:09
Diffstat (limited to 'tex')
-rw-r--r-- | tex/context/base/char-ini.lua | 1 | ||||
-rw-r--r-- | tex/context/base/cont-new.mkiv | 2 | ||||
-rw-r--r-- | tex/context/base/context-version.pdf | bin | 4107 -> 4108 bytes | |||
-rw-r--r-- | tex/context/base/context.mkiv | 2 | ||||
-rw-r--r-- | tex/context/base/core-uti.lua | 12 | ||||
-rw-r--r-- | tex/context/base/font-odv.lua | 124 | ||||
-rw-r--r-- | tex/context/base/l-lpeg.lua | 5 | ||||
-rw-r--r-- | tex/context/base/l-unicode.lua | 31 | ||||
-rw-r--r-- | tex/context/base/spac-chr.lua | 54 | ||||
-rw-r--r-- | tex/context/base/status-files.pdf | bin | 24726 -> 24750 bytes | |||
-rw-r--r-- | tex/context/base/status-lua.log | 2 | ||||
-rw-r--r-- | tex/context/base/status-mkiv.lua | 33 | ||||
-rw-r--r-- | tex/context/base/task-ini.lua | 8 | ||||
-rw-r--r-- | tex/context/base/util-tab.lua | 4 | ||||
-rw-r--r-- | tex/generic/context/luatex/luatex-fonts-merged.lua | 4 |
15 files changed, 217 insertions, 65 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 948e51aa0..625996033 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -1196,4 +1196,3 @@ end -- entities.amp = utfchar(characters.activeoffset + utfbyte("&")) -- entities.gt = utfchar(characters.activeoffset + utfbyte(">")) -- end - diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 074c7b778..497397121 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2013.09.18 10:06} +\newcontextversion{2013.09.18 19:09} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf Binary files differindex 55a71b6ac..bc99742ce 100644 --- a/tex/context/base/context-version.pdf +++ b/tex/context/base/context-version.pdf diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index fcf1b85e3..5f49d7857 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -25,7 +25,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2013.09.18 10:06} +\edef\contextversion{2013.09.18 19:09} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/core-uti.lua b/tex/context/base/core-uti.lua index 4e3c839bd..b1e9061b3 100644 --- a/tex/context/base/core-uti.lua +++ b/tex/context/base/core-uti.lua @@ -279,6 +279,12 @@ statistics.register("randomizer", function() end end) +-- local kg_per_watt_per_second = 1 / 15000000 +-- local watts_per_core = 50 +-- local speedup_by_other_engine = 1.2 +-- local used_wood_factor = watts_per_core * kg_per_watt_per_second / speedup_by_other_engine +-- local used_wood_factor = (50 / 15000000) / 1.2 + function statistics.formatruntime(runtime) if not environment.initex then -- else error when testing as not counters yet local shipped = texgetcount('nofshipouts') @@ -289,7 +295,13 @@ function statistics.formatruntime(runtime) if shipped > 0 or pages > 0 then local persecond = shipped / runtime if pages == 0 then pages = shipped end +-- if jit then +-- local saved = watts_per_core * runtime * kg_per_watt_per_second / speedup_by_other_engine +-- local saved = used_wood_factor * runtime +-- return format("%s seconds, %i processed pages, %i shipped pages, %.3f pages/second, %f kg tree saved by using luajittex",runtime,pages,shipped,persecond,saved) +-- else return format("%s seconds, %i processed pages, %i shipped pages, %.3f pages/second",runtime,pages,shipped,persecond) +-- end else return format("%s seconds",runtime) end diff --git a/tex/context/base/font-odv.lua b/tex/context/base/font-odv.lua index 4aa150869..164377e80 100644 --- a/tex/context/base/font-odv.lua +++ b/tex/context/base/font-odv.lua @@ -100,21 +100,6 @@ local methods = fonts.analyzers.methods local otffeatures = fonts.constructors.newfeatures("otf") local registerotffeature = otffeatures.register -local fontprocesses = fonts.hashes.processes -local xprocesscharacters = nodes.handlers.characters - -local function processcharacters(head,font) - return xprocesscharacters(head) -end - --- function processcharacters(head,font) --- local processors = fontprocesses[font] --- for i=1,#processors do --- head = processors[i](head,font,0) --- end --- return head, true --- end - local insert_node_after = node.insert_after local copy_node = node.copy local free_node = node.free @@ -138,6 +123,30 @@ local s_pref = states.pref local s_blwf = states.blwf local s_pstf = states.pstf +local replace_all_nbsp = nil + +replace_all_nbsp = function(head) -- delayed definition + replace_all_nbsp = typesetters and typesetters.characters and typesetters.characters.replacenbspaces or function(head) + return head + end + return replace_all_nbsp(head) +end + +local fontprocesses = fonts.hashes.processes +local xprocesscharacters = nodes.handlers.characters + +local function processcharacters(head,font) + return xprocesscharacters(head) +end + +-- function processcharacters(head,font) +-- local processors = fontprocesses[font] +-- for i=1,#processors do +-- head = processors[i](head,font,0) +-- end +-- return head, true +-- end + -- In due time there will be entries here for scripts like Bengali, Gujarati, -- Gurmukhi, Kannada, Malayalam, Oriya, Tamil, Telugu. Feel free to provide the -- code points. @@ -535,15 +544,15 @@ local function deva_initialize(font,attr) end -local function deva_reorder(head,start,stop,font,attr) +local function deva_reorder(head,start,stop,font,attr,nbspaces) local lookuphash, reph, vattu, blwfcache = deva_initialize(font,attr) -- could be inlines but ugly - local current = start - local n = start.next - local base = nil + local current = start + local n = start.next + local base = nil local firstcons = nil - local lastcons = nil + local lastcons = nil local basefound = false if start.char == c_ra and halant[n.char] and reph then @@ -568,8 +577,11 @@ local function deva_reorder(head,start,stop,font,attr) free_node(current) return head, stop else - base, firstcons, lastcons = current, current, current - current = current.next + nbspaces[current] = true + base = current + firstcons = current + lastcons = current + current = current.next if current ~= stop then if nukta[current.char] then current = current.next @@ -861,7 +873,14 @@ local function deva_reorder(head,start,stop,font,attr) end else local char = current.char - if consonant[char] or char == c_nbsp then -- maybe combined hash + if consonant[char] then + cns = current + local next = cns.next + if halant[next.char] then + cns = next + end + elseif char == c_nbsp then + nbspaces[current] = true cns = current local next = cns.next if halant[next.char] then @@ -874,6 +893,7 @@ local function deva_reorder(head,start,stop,font,attr) end if base.char == c_nbsp then + nbspaces[base] = nil head = remove_node(head,base) free_node(base) end @@ -1208,13 +1228,18 @@ end -- this one will be merged into the caller: it saves a call, but we will then make function -- of the actions -local function dev2_reorder(head,start,stop,font,attr) -- maybe do a pass over (determine stop in sweep) +local function dev2_reorder(head,start,stop,font,attr,nbspaces) -- maybe do a pass over (determine stop in sweep) local lookuphash, seqsubset = dev2_initialize(font,attr) - local reph, pre_base_reordering_consonants = false, { } -- was nil ... probably went unnoticed because never assigned - local halfpos, basepos, subpos, postpos = nil, nil, nil, nil - local locl = { } + local pre_base_reordering_consonants = { } -- was nil ... probably went unnoticed because never assigned + + local reph = false -- was nil ... probably went unnoticed because never assigned + local halfpos = nil + local basepos = nil + local subpos = nil + local postpos = nil + local locl = { } for i=1,#seqsubset do @@ -1262,6 +1287,7 @@ local function dev2_reorder(head,start,stop,font,attr) -- maybe do a pass over ( elseif kind == "pref" then -- why not global? pretty ineffient this way -- this will move to the initializer and we will store the hash in dataset + -- todo: reph might also be result of chain for k, v in lookupcache[0x094D], next do pre_base_reordering_consonants[k] = v and v["ligature"] --ToDo: reph might also be result of chain end @@ -1364,15 +1390,17 @@ local function dev2_reorder(head,start,stop,font,attr) -- maybe do a pass over ( current = start.next.next end - if current ~= stop.next and current.char == c_nbsp then - -- Stand Alone cluster + local function action(is_nbsp) if current == stop then stop = stop.prev head = remove_node(head,current) free_node(current) return head, stop else - base = current + if is_nbsp then + nbspaces[current] = true + end + base = current current = current.next if current ~= stop then local char = current.char @@ -1410,6 +1438,14 @@ local function dev2_reorder(head,start,stop,font,attr) -- maybe do a pass over ( end end end + end + + if current ~= stop.next then + -- Stand Alone cluster + stand_alone() + elseif current.char == c_nbsp then + -- Stand Alone cluster + stand_alone(true) else -- not Stand Alone cluster local last = stop.next while current ~= last do -- find base consonant @@ -1573,6 +1609,7 @@ local function dev2_reorder(head,start,stop,font,attr) -- maybe do a pass over ( end if base.char == c_nbsp then + nbspaces[base] = nil head = remove_node(head, base) free_node(base) end @@ -1900,7 +1937,10 @@ end -- a lot. Common code has been synced. function methods.deva(head,font,attr) - local current, start, done = head, true, false + local current = head + local start = true + local done = false + local nbspaces = { } while current do if current.id == glyph_code and current.subtype<256 and current.font == font then done = true @@ -1932,7 +1972,7 @@ function methods.deva(head,font,attr) local syllableend = analyze_next_chars_one(c,font,2) current = syllableend.next if syllablestart ~= syllableend then - head, current = deva_reorder(head,syllablestart,syllableend,font,attr) + head, current = deva_reorder(head,syllablestart,syllableend,font,attr,nbspaces) current = current.next end else @@ -2041,7 +2081,7 @@ function methods.deva(head,font,attr) end end if syllablestart ~= syllableend then - head, current = deva_reorder(head,syllablestart,syllableend,font,attr) + head, current = deva_reorder(head,syllablestart,syllableend,font,attr,nbspaces) current = current.next end elseif independent_vowel[char] then @@ -2080,6 +2120,9 @@ function methods.deva(head,font,attr) start = false end + if next(nbspaces) then + head = replace_all_nbsp(head,nbspaces) + end return head, done end @@ -2089,10 +2132,11 @@ end -- handler(head,start,kind,lookupname,lookupmatch,sequence,lookuphash,1) function methods.dev2(head,font,attr) - local current = head - local start = true - local done = false - local syllabe = 0 + local current = head + local start = true + local done = false + local syllabe = 0 + local nbspaces = { } while current do local syllablestart, syllableend = nil, nil if current.id == glyph_code and current.subtype<256 and current.font == font then @@ -2114,6 +2158,7 @@ function methods.dev2(head,font,attr) else local standalone = char == c_nbsp if standalone then + nbspaces[current] = true local p = current.prev if not p then -- begin of paragraph or box @@ -2148,7 +2193,7 @@ function methods.dev2(head,font,attr) end end if syllableend and syllablestart ~= syllableend then - head, current = dev2_reorder(head,syllablestart,syllableend,font,attr) + head, current = dev2_reorder(head,syllablestart,syllableend,font,attr,nbspaces) end if not syllableend and current.id == glyph_code and current.subtype<256 and current.font == font and not current[a_state] then local mark = mark_four[current.char] @@ -2159,6 +2204,9 @@ function methods.dev2(head,font,attr) start = false current = current.next end + if next(nbspaces) then + head = replace_all_nbsp(head,nbspaces) + end return head, done end diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index cafa18a38..58c552419 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -82,7 +82,6 @@ setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns - local anything = P(1) local endofstring = P(-1) local alwaysmatched = P(true) @@ -120,6 +119,9 @@ local utfbom = utfbom_32_be + utfbom_32_le local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfstricttype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + + utfbom_8 * Cc("utf-8") local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + utfbom_8 * Cc(3) + Cc(0) @@ -141,6 +143,7 @@ patterns.utf8three = R("\224\239") * utf8next * utf8next patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next patterns.utfbom = utfbom patterns.utftype = utftype +patterns.utfstricttype = utfstricttype patterns.utfoffset = utfoffset local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index 3ce5bd3a7..7ada394d5 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -38,13 +38,14 @@ local replacer = lpeg.replacer local utfvalues = utf.values local utfgmatch = utf.gmatch -- not always present -local p_utftype = patterns.utftype -local p_utfoffset = patterns.utfoffset -local p_utf8char = patterns.utf8char -local p_utf8byte = patterns.utf8byte -local p_utfbom = patterns.utfbom -local p_newline = patterns.newline -local p_whitespace = patterns.whitespace +local p_utftype = patterns.utftype +local p_utfstricttype = patterns.utfstricttype +local p_utfoffset = patterns.utfoffset +local p_utf8char = patterns.utf8char +local p_utf8byte = patterns.utf8byte +local p_utfbom = patterns.utfbom +local p_newline = patterns.newline +local p_whitespace = patterns.whitespace if not unicode then @@ -976,6 +977,22 @@ function utf.xstring(s) return format("0x%05X",type(s) == "number" and s or utfbyte(s)) end +function utf.toeight(str) + if not str then + return nil + end + local utftype = lpegmatch(p_utfstricttype,str) + if utftype == "utf-8" then + return sub(str,4) + elseif utftype == "utf-16-le" then + return utf16_to_utf8_le(str) + elseif utftype == "utf-16-be" then + return utf16_to_utf8_ne(str) + else + return str + end +end + -- local p_nany = p_utf8char / "" diff --git a/tex/context/base/spac-chr.lua b/tex/context/base/spac-chr.lua index f3c62bb77..03c60d275 100644 --- a/tex/context/base/spac-chr.lua +++ b/tex/context/base/spac-chr.lua @@ -14,6 +14,8 @@ local byte, lower = string.byte, string.lower -- to be redone: characters will become tagged spaces instead as then we keep track of -- spaceskip etc +local next = next + trace_characters = false trackers.register("typesetters.characters", function(v) trace_characters = v end) report_characters = logs.reporter("typesetting","characters") @@ -41,7 +43,7 @@ local chardata = characters.data local typesetters = typesetters -local characters = { } +local characters = typesetters.characters or { } -- can be predefined typesetters.characters = characters local fonthashes = fonts.hashes @@ -93,20 +95,52 @@ local function inject_nobreak_space(unicode,head,current,space,spacestretch,spac return head, current end +local keepnbspbefore = { + [0x094D] = true, -- category mn + [0x0CCD] = true, +} + +characters.keepnbspbefore = keepnbspbefore -- so we can extend + +local function nbsp(head,current) + local para = fontparameters[current.font] + if current[a_alignstate] == 1 then -- flushright + head, current = inject_nobreak_space(0x00A0,head,current,para.space,0,0) + current.subtype = space_skip_code + else + head, current = inject_nobreak_space(0x00A0,head,current,para.space,para.spacestretch,para.spaceshrink) + end + return head, current +end + +-- assumes nuts or nodes, depending on callers .. so no tonuts here + +function characters.replacenbsp(head,current) + head, current = nbsp(head,current) + head, current = remove_node(head,current,true) + return head, current +end + +function characters.replacenbspaces(head,nbspaces) + for current in next, nbspaces do + head, current = nbsp(head,current) + head, current = remove_node(head,current,true) + end + return head +end + local methods = { -- The next one uses an attribute assigned to the character but still we -- don't have the 'local' value. [0x00A0] = function(head,current) -- nbsp - local para = fontparameters[current.font] - if current[a_alignstate] == 1 then -- flushright - head, current = inject_nobreak_space(0x00A0,head,current,para.space,0,0) - current.subtype = space_skip_code + local next = current.next + if next and next.id == glyph_code and keepnbspbefore[next.char] then + return false else - head, current = inject_nobreak_space(0x00A0,head,current,para.space,para.spacestretch,para.spaceshrink) + return nbsp(head,current) end - return head, current end, [0x2000] = function(head,current) -- enquad @@ -184,8 +218,10 @@ function characters.handler(head) if trace_characters then report_characters("replacing character %C, description %a",char,lower(chardata[char].description)) end - head = method(head,current) - head = remove_node(head,current,true) + local h = method(head,current) + if h then + head = remove_node(h,current,true) + end done = true end current = next diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf Binary files differindex ff9b9f988..5ca575b44 100644 --- a/tex/context/base/status-files.pdf +++ b/tex/context/base/status-files.pdf diff --git a/tex/context/base/status-lua.log b/tex/context/base/status-lua.log index 771f67ad7..10358a1a5 100644 --- a/tex/context/base/status-lua.log +++ b/tex/context/base/status-lua.log @@ -1,6 +1,6 @@ (cont-yes.mkiv -ConTeXt ver: 2013.09.18 10:06 MKIV beta fmt: 2013.9.18 int: english/english +ConTeXt ver: 2013.09.18 19:09 MKIV beta fmt: 2013.9.18 int: english/english system > 'cont-new.mkiv' loaded (cont-new.mkiv) diff --git a/tex/context/base/status-mkiv.lua b/tex/context/base/status-mkiv.lua index 75ec24b99..349c34cdc 100644 --- a/tex/context/base/status-mkiv.lua +++ b/tex/context/base/status-mkiv.lua @@ -1686,6 +1686,12 @@ return { }, { category = "mkiv", + filename = "meta-fnt", + loading = "always", + status = "okay", + }, + { + category = "mkiv", filename = "meta-tex", loading = "always", status = "okay", @@ -3860,6 +3866,12 @@ return { }, { category = "lua", + filename = "meta-fnt", + loading = "meta-fnt", + status = "okay", + }, + { + category = "lua", comment = "could be done nicer nowadays but who needs it", filename = "meta-pdf", loading = "meta-pdf", @@ -4764,10 +4776,31 @@ return { { category = "lua", filename = "typo-dir", + loading = "typo-dir", + status = "okay", + }, + { + category = "lua", + comment = "work in progress", + filename = "typo-dha", + loading = "typo-dir", status = "todo", }, { category = "lua", + filename = "typo-dua", + loading = "typo-dir", + status = "okay", + }, + { + category = "lua", + comment = "work in progress", + filename = "typo-dub", + loading = "typo-dir", + status = "okay", + }, + { + category = "lua", filename = "typo-ini", status = "todo", }, diff --git a/tex/context/base/task-ini.lua b/tex/context/base/task-ini.lua index 4390a4521..1022483a0 100644 --- a/tex/context/base/task-ini.lua +++ b/tex/context/base/task-ini.lua @@ -32,10 +32,10 @@ appendaction("processors", "characters", "typesetters.breakpoints.handler") appendaction("processors", "characters", "scripts.injectors.handler") -- disabled appendaction("processors", "words", "builders.kernel.hyphenation") -- always on -appendaction("processors", "words", "languages.words.check") -- disabled +appendaction("processors", "words", "languages.words.check") -- disabled -- might move up, no disc check needed then -appendaction("processors", "words", "typesetters.initials.handler") -- disabled -appendaction("processors", "words", "typesetters.firstlines.handler") -- disabled +appendaction("processors", "words", "typesetters.initials.handler") -- disabled -- might move up +appendaction("processors", "words", "typesetters.firstlines.handler") -- disabled -- might move up appendaction("processors", "fonts", "builders.paragraphs.solutions.splitters.split") -- experimental appendaction("processors", "fonts", "nodes.handlers.characters") -- maybe todo @@ -50,7 +50,7 @@ appendaction("processors", "lists", "typesetters.spacings.handler") appendaction("processors", "lists", "typesetters.kerns.handler") -- disabled appendaction("processors", "lists", "typesetters.digits.handler") -- disabled (after otf handling) appendaction("processors", "lists", "typesetters.italics.handler") -- disabled (after otf/kern handling) -------------("processors", "lists", "typesetters.initials.handler") -- disabled +------------("processors", "lists", "typesetters.initials.handler") -- disabled appendaction("shipouts", "normalizers", "nodes.handlers.cleanuppage") -- disabled appendaction("shipouts", "normalizers", "typesetters.alignments.handler") diff --git a/tex/context/base/util-tab.lua b/tex/context/base/util-tab.lua index f18c719e4..06c374c67 100644 --- a/tex/context/base/util-tab.lua +++ b/tex/context/base/util-tab.lua @@ -10,13 +10,14 @@ utilities = utilities or {} utilities.tables = utilities.tables or { } local tables = utilities.tables -local format, gmatch, gsub = string.format, string.gmatch, string.gsub +local format, gmatch, gsub, sub = string.format, string.gmatch, string.gsub, string.sub local concat, insert, remove = table.concat, table.insert, table.remove local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring local type, next, rawset, tonumber, tostring, load, select = type, next, rawset, tonumber, tostring, load, select local lpegmatch, P, Cs, Cc = lpeg.match, lpeg.P, lpeg.Cs, lpeg.Cc local sortedkeys, sortedpairs = table.sortedkeys, table.sortedpairs local formatters = string.formatters +local utftoeight = utf.toeight local splitter = lpeg.tsplitat(".") @@ -375,6 +376,7 @@ function table.load(filename,loader) if filename then local t = (loader or io.loaddata)(filename) if t and t ~= "" then + local t = utftoeight(t) t = load(t) if type(t) == "function" then t = t() diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 9e311c6b8..b868d9ac3 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 09/18/13 10:06:34 +-- merge date : 09/18/13 19:09:33 do -- begin closure to overcome local limits and interference @@ -134,6 +134,7 @@ local utfbom_16_le=P('\255\254') local utfbom_8=P('\239\187\191') local utfbom=utfbom_32_be+utfbom_32_le+utfbom_16_be+utfbom_16_le+utfbom_8 local utftype=utfbom_32_be*Cc("utf-32-be")+utfbom_32_le*Cc("utf-32-le")+utfbom_16_be*Cc("utf-16-be")+utfbom_16_le*Cc("utf-16-le")+utfbom_8*Cc("utf-8")+alwaysmatched*Cc("utf-8") +local utfstricttype=utfbom_32_be*Cc("utf-32-be")+utfbom_32_le*Cc("utf-32-le")+utfbom_16_be*Cc("utf-16-be")+utfbom_16_le*Cc("utf-16-le")+utfbom_8*Cc("utf-8") local utfoffset=utfbom_32_be*Cc(4)+utfbom_32_le*Cc(4)+utfbom_16_be*Cc(2)+utfbom_16_le*Cc(2)+utfbom_8*Cc(3)+Cc(0) local utf8next=R("\128\191") patterns.utfbom_32_be=utfbom_32_be @@ -149,6 +150,7 @@ patterns.utf8three=R("\224\239")*utf8next*utf8next patterns.utf8four=R("\240\244")*utf8next*utf8next*utf8next patterns.utfbom=utfbom patterns.utftype=utftype +patterns.utfstricttype=utfstricttype patterns.utfoffset=utfoffset local utf8char=patterns.utf8one+patterns.utf8two+patterns.utf8three+patterns.utf8four local validutf8char=utf8char^0*endofstring*Cc(true)+Cc(false) |