diff options
Diffstat (limited to 'tex/context/base/char-utf.lua')
-rw-r--r-- | tex/context/base/char-utf.lua | 146 |
1 files changed, 26 insertions, 120 deletions
diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua index 273923c36..7dd5d914f 100644 --- a/tex/context/base/char-utf.lua +++ b/tex/context/base/char-utf.lua @@ -1,6 +1,6 @@ if not modules then modules = { } end modules ['char-utf'] = { version = 1.001, - comment = "companion to char-ini.tex", + comment = "companion to char-utf.tex", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" @@ -19,9 +19,11 @@ in special kinds of output (for instance <l n='pdf'/>).</p> over a string.</p> --ldx]]-- -local concat = table.concat +local utf = unicode.utf8 +local concat, gmatch = table.concat, string.gmatch +local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues -utf = utf or unicode.utf8 +local ctxcatcodes = tex.ctxcatcodes characters = characters or { } characters.graphemes = characters.graphemes or { } @@ -38,17 +40,12 @@ local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub --[[ldx-- <p>It only makes sense to collapse at runtime, since we don't expect -source code to depend on collapsing:</p> - -<typing> -characters.filters.utf.collapsing = true -input.filters.utf_translator = characters.filters.utf.collapse -</typing> +source code to depend on collapsing.</p> --ldx]]-- function utffilters.initialize() if utffilters.collapsing and not utffilters.initialized then - for k,v in pairs(characters.data) do + for k,v in next, characters.data do -- using vs and first testing for length is faster (.02->.01 s) local vs = v.specials if vs and #vs == 3 and vs[1] == 'char' then @@ -86,7 +83,7 @@ function utffilters.collapse(str) -- old one utffilters.initialize() end local tokens, first, done = { }, false, false - for second in str:utfcharacters() do + for second in utfcharacters(str) do local cgf = graphemes[first] if cgf and cgf[second] then first, done = cgf[second], true @@ -132,7 +129,7 @@ utffilters.private = { local low = utffilters.private.low local high = utffilters.private.high local escapes = utffilters.private.escapes -local special = "~#$%^&_{}\\" +local special = "~#$%^&_{}\\|" function utffilters.private.set(ch) local cb @@ -154,7 +151,7 @@ function utffilters.private.escape(str) return utfgsub(str,"(.)", escapes) end local set = utffilters.private.set -for ch in special:gmatch(".") do set(ch) end +for ch in gmatch(special,".") do set(ch) end --[[ldx-- <p>We get a more efficient variant of this when we integrate @@ -186,7 +183,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t cf.initialize() end local tokens, first, done, n = { }, false, false, 0 - for second in str:utfcharacters() do + for second in utfcharacters(str) do if done then local crs = cr[second] if crs then @@ -208,7 +205,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t else local crs = cr[second] if crs then - for s in str:utfcharacters() do + for s in utfcharacters(str) do if n == 1 then break else @@ -222,7 +219,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t else local cgf = graphemes[first] if cgf and cgf[second] then - for s in str:utfcharacters() do + for s in utfcharacters(str) do if n == 1 then break else @@ -248,120 +245,29 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t end --[[ldx-- -<p>In the beginning of <l n='luatex'/> we experimented with a sequence -of filters so that we could manipulate the input stream. However, since -this is a partial solution (not taking macro expansion into account) -and since it may interfere with non-text, we will not use this feature -by default.</p> - -<typing> -utffilters.collapsing = true -characters.filters.append(utffilters.collapse) -characters.filters.activated = true -callback.register('process_input_buffer', characters.filters.process) -</typing> - -<p>The following helper functions may disappear (or become optional) -in the future. Well, they are now.</p> +<p>Next we implement some commands that are used in the user interface.</p> --ldx]]-- ---[[obsolete-- - -characters.filters.sequences = characters.filters.sequences or { } -characters.filters.activated = false - -function characters.filters.append(name) - table.insert(characters.filters.sequences,name) -end - -function characters.filters.prepend(name) - table.insert(characters.filters.sequences,1,name) -end - -function characters.filters.remove(name) - for k,v in ipairs(characters.filters.sequences) do - if v == name then - table.remove(characters.filters.sequences,k) - end - end -end - -function characters.filters.replace(name_1,name_2) - for k,v in ipairs(characters.filters.sequences) do - if v == name_1 then - characters.filters.sequences[k] = name_2 - break - end - end -end - -function characters.filters.insert_before(name_1,name_2) - for k,v in ipairs(characters.filters.sequences) do - if v == name_1 then - table.insert(characters.filters.sequences,k,name_2) - break - end - end -end +commands = commands or { } -function characters.filters.insert_after(name_1,name_2) - for k,v in ipairs(characters.filters.sequences) do - if v == name_1 then - table.insert(characters.filters.sequences,k+1,name_2) - break - end - end +function commands.uchar(first,second) + tex.sprint(ctxcatcodes,utfchar(first*256+second)) end -function characters.filters.list(separator) - concat(characters.filters.sequences,seperator or ' ') -end - -function characters.filters.process(str) - if characters.filters.activated then - for _,v in ipairs(characters.filters.sequences) do - str = v(str) - end - return str - else - return nil -- luatex callback optimalisation - end -end - ---obsolete]]-- - --[[ldx-- -<p>The following code is no longer needed and replaced by token -collectors somehwere else.</p> +<p>A few helpers (used to be <t>luat-uni<t/>).</p> --ldx]]-- ---[[obsolete-- - -characters.filters.collector = { } -characters.filters.collector.data = { } -characters.filters.collector.collecting = false - -function characters.filters.collector.reset() - characters.filters.collector.data = { } -end - -function characters.filters.collector.flush(separator) - tex.sprint(concat(characters.filters.collector.data,separator)) -end - -function characters.filters.collector.prune(n) - for i=1,n do - table.remove(characters.filters.collector.data,-1) +function utf.split(str) + local t = { } + for snippet in utfcharacters(str) do + t[#t+1] = snippet end + return t end -function characters.filters.collector.numerate(str) - if characters.filters.collector.collecting then - table.insert(characters.filters.collector.data,(unicode.utf8.gsub(str,"(.)", function(c) - return ("0x%04X "):format(unicode.utf8.byte(c)) - end))) +function utf.each(str,fnc) + for snippet in utfcharacters(str) do + fnc(snippet) end - return str end - ---obsolete]]-- |