diff options
Diffstat (limited to 'tex')
23 files changed, 1007 insertions, 763 deletions
diff --git a/tex/context/base/char-def.lua b/tex/context/base/char-def.lua index 0e1d8778e..f30e82898 100644 --- a/tex/context/base/char-def.lua +++ b/tex/context/base/char-def.lua @@ -2389,6 +2389,7 @@ characters.data={    direction="l",    linebreak="al",    uccode={ 0x53, 0x53 }, +  shcode={ 0x73, 0x73 },    unicodeslot=0xDF,   },   { @@ -214783,4 +214784,4 @@ characters.data={    linebreak="cm",    unicodeslot=0xE01EF,   }, -}
\ No newline at end of file +} diff --git a/tex/context/base/char-enc.lua b/tex/context/base/char-enc.lua index 048837eec..c2061891a 100644 --- a/tex/context/base/char-enc.lua +++ b/tex/context/base/char-enc.lua @@ -9,6 +9,8 @@ if not modules then modules = { } end modules ['char-enc'] = {  -- Thanks to tex4ht for these mappings. +local next = next +  local allocate, setinitializer = utilities.storage.allocate, utilities.storage.setinitializer  characters       = characters or { } @@ -169,7 +171,10 @@ characters.synonyms = allocate { -- afm mess  -- that table.print would not work on this file unless it is accessed once. This  -- why the serializer does a dummy access. -local enccodes = allocate()  characters.enccodes = enccodes +local enccodes      = allocate() +characters.enccodes = enccodes + + -- maybe omit context name -> then same as encodings.make_unicode_vector  local function initialize()      for unicode, data in next, characters.data do @@ -179,7 +184,9 @@ local function initialize()          end      end      for name, unicode in next, characters.synonyms do -        if not enccodes[name] then enccodes[name] = unicode end +        if not enccodes[name] then +            enccodes[name] = unicode +        end      end  end diff --git a/tex/context/base/char-fio.lua b/tex/context/base/char-fio.lua new file mode 100644 index 000000000..766ea7123 --- /dev/null +++ b/tex/context/base/char-fio.lua @@ -0,0 +1,56 @@ +if not modules then modules = { } end modules ['char-fio'] = { +    version   = 1.001, +    comment   = "companion to char-ini.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +-- -- + +local sequencers      = utilities.sequencers +local appendaction    = sequencers.appendaction +local enableaction    = sequencers.enableaction +local disableaction   = sequencers.disableaction + +local utffilters      = characters.filters.utf + +local textfileactions = resolvers.openers.helpers.textfileactions +local textlineactions = resolvers.openers.helpers.textlineactions + +appendaction (textfileactions,"system","characters.filters.utf.reorder") +disableaction(textfileactions,         "characters.filters.utf.reorder") + +appendaction (textlineactions,"system","characters.filters.utf.reorder") +disableaction(textlineactions,         "characters.filters.utf.reorder") + +appendaction (textfileactions,"system","characters.filters.utf.collapse") +disableaction(textfileactions,         "characters.filters.utf.collapse") + +appendaction (textfileactions,"system","characters.filters.utf.decompose") +disableaction(textfileactions,         "characters.filters.utf.decompose") + +function characters.filters.utf.enable() +    enableaction(textfileactions,"characters.filters.utf.reorder") +    enableaction(textfileactions,"characters.filters.utf.collapse") +    enableaction(textfileactions,"characters.filters.utf.decompose") +end + +local function configure(what,v) +    if not v then +        disableaction(textfileactions,what) +        disableaction(textlineactions,what) +    elseif v == "line" then +        disableaction(textfileactions,what) +        enableaction (textlineactions,what) +    else -- true or text +        enableaction (textfileactions,what) +        disableaction(textlineactions,what) +    end +end + +directives.register("filters.utf.reorder",   function(v) configure("characters.filters.utf.reorder",  v) end) +directives.register("filters.utf.collapse",  function(v) configure("characters.filters.utf.collapse", v) end) +directives.register("filters.utf.decompose", function(v) configure("characters.filters.utf.decompose",v) end) + +utffilters.setskippable { "mkiv", "mkvi", "mkix", "mkxi" } diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index eb73cc19e..a2505c0eb 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -7,26 +7,33 @@ if not modules then modules = { } end modules ['char-ini'] = {  }  -- todo: make two files, one for format generation, one for format use +-- todo: move some to char-utf  -- we can remove the tag range starting at 0xE0000 (special applications)  local utfchar, utfbyte, utfvalues, ustring, utotable = utf.char, utf.byte, utf.values, utf.ustring, utf.totable  local concat, unpack, tohash = table.concat, table.unpack, table.tohash  local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset -local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch -local P, R, Cs, lpegmatch, patterns = lpeg.P, lpeg.R, lpeg.Cs, lpeg.match, lpeg.patterns +local format, lower, gsub = string.format, string.lower, string.gsub +local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs -local utf8byte          = patterns.utf8byte -local utf8char          = patterns.utf8char +if not characters then require("char-def") end -local allocate          = utilities.storage.allocate -local mark              = utilities.storage.mark +local lpegpatterns          = lpeg.patterns +local lpegmatch             = lpeg.match +local utf8byte              = lpegpatterns.utf8byte +local utf8char              = lpegpatterns.utf8char -local setmetatableindex = table.setmetatableindex +local utfchartabletopattern = lpeg.utfchartabletopattern -local trace_defining    = false  trackers.register("characters.defining", function(v) characters_defining = v end) +local allocate              = utilities.storage.allocate +local mark                  = utilities.storage.mark -local report_defining   = logs.reporter("characters") +local setmetatableindex     = table.setmetatableindex + +local trace_defining        = false  trackers.register("characters.defining", function(v) characters_defining = v end) + +local report_defining       = logs.reporter("characters")  --[[ldx--  <p>This module implements some methods and creates additional datastructured @@ -60,7 +67,7 @@ end  local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end) -patterns.chartonumber = pattern +lpegpatterns.chartonumber = pattern  local function chartonumber(k)      if type(k) == "string" then @@ -420,13 +427,15 @@ setmetatableindex(otfscripts,function(t,unicode)      return "dflt"  end) +local splitter = lpeg.splitat(S(":-")) +  function characters.getrange(name) -- used in font fallback definitions (name or range)      local range = blocks[name]      if range then          return range.first, range.last, range.description, range.gaps      end      name = gsub(name,'"',"0x") -- goodie: tex hex notation -    local start, stop = match(name,"^(.-)[%-%:](.-)$") +    local start, stop = lpegmatch(splitter,name)      if start and stop then          start, stop = tonumber(start,16) or tonumber(start), tonumber(stop,16) or tonumber(stop)          if start and stop then @@ -870,17 +879,92 @@ end  ----- toupper = Cs((utf8byte/ucchars)^0)  ----- toshape = Cs((utf8byte/shchars)^0) -local tolower = Cs((utf8char/lcchars)^0) -local toupper = Cs((utf8char/ucchars)^0) -local toshape = Cs((utf8char/shchars)^0) - -patterns.tolower = tolower -patterns.toupper = toupper -patterns.toshape = toshape +local tolower = Cs((utf8char/lcchars)^0) -- no need to check spacing +local toupper = Cs((utf8char/ucchars)^0) -- no need to check spacing +local toshape = Cs((utf8char/shchars)^0) -- no need to check spacing + +lpegpatterns.tolower = tolower +lpegpatterns.toupper = toupper +lpegpatterns.toshape = toshape + +-- function characters.lower (str) return lpegmatch(tolower,str) end +-- function characters.upper (str) return lpegmatch(toupper,str) end +-- function characters.shaped(str) return lpegmatch(toshape,str) end + +local lhash = { } +local uhash = { } +local shash = { } + +for k, v in next, characters.data do + -- if k < 0x11000 then +        local l = v.lccode +        if l then +            if type(l) == "number" then +                lhash[utfchar(k)] = utfchar(l) +            elseif #l == 2 then +                lhash[utfchar(k)] = utfchar(l[1]) .. utfchar(l[2]) +            else +                inspect(v) +            end +        else +            local u = v.uccode +            if u then +                if type(u) == "number" then +                    uhash[utfchar(k)] = utfchar(u) +                elseif #u == 2 then +                    uhash[utfchar(k)] = utfchar(u[1]) .. utfchar(u[2]) +                else +                    inspect(v) +                end +            end +        end +        local s = v.shcode +        if s then +            if type(s) == "number" then +                shash[utfchar(k)] = utfchar(s) +            elseif #s == 2 then +                shash[utfchar(k)] = utfchar(s[1]) .. utfchar(s[2]) +            else +                inspect(v) +            end +        end + -- end +end -function characters.lower (str) return lpegmatch(tolower,str) end -function characters.upper (str) return lpegmatch(toupper,str) end -function characters.shaped(str) return lpegmatch(toshape,str) end +local utf8lower = Cs((utfchartabletopattern(lhash) / lhash + utf8char)^0) +local utf8upper = Cs((utfchartabletopattern(uhash) / uhash + utf8char)^0) +local utf8shape = Cs((utfchartabletopattern(shash) / shash + utf8char)^0) + +lpegpatterns.utf8lower = utf8lower +lpegpatterns.utf8upper = utf8upper +lpegpatterns.utf8shape = utf8shape + +function characters.lower (str) return lpegmatch(utf8lower,str) end +function characters.upper (str) return lpegmatch(utf8upper,str) end +function characters.shaped(str) return lpegmatch(utf8shape,str) end + +-- local str = [[ +--     ÀÁÂÃÄÅàáâãäå àáâãäåàáâãäå ÀÁÂÃÄÅÀÁÂÃÄÅ AAAAAAaaaaaa +--     ÆÇæç         æçæç         ÆÇÆÇ         AECaec +--     ÈÉÊËèéêë     èéêëèéêë     ÈÉÊËÈÉÊË     EEEEeeee +--     ÌÍÎÏÞìíîïþ   ìíîïþìíîïþ   ÌÍÎÏÞÌÍÎÏÞ   IIIIÞiiiiþ +--     Ðð           ðð           ÐÐ           Ðð +--     Ññ           ññ           ÑÑ           Nn +--     ÒÓÔÕÖòóôõö   òóôõöòóôõö   ÒÓÔÕÖÒÓÔÕÖ   OOOOOooooo +--     Øø           øø           ØØ           Oo +--     ÙÚÛÜùúûü     ùúûüùúûü     ÙÚÛÜÙÚÛÜ     UUUUuuuu +--     Ýýÿ          ýýÿ          ÝÝŸ          Yyy +--     ß            ß            SS           ss +--     Ţţ           ţţ           ŢŢ           Tt +-- ]] +-- +-- local lower  = characters.lower   print(lower(str)) +-- local upper  = characters.upper   print(upper(str)) +-- local shaped = characters.shaped  print(shaped(str)) +-- +-- local c, n = os.clock(), 10000 +-- for i=1,n do lower(str) upper(str) shaped(str) end -- 2.08 => 0.77 +-- print(os.clock()-c,n*#str*3)  -- maybe: (twice as fast when much ascii)  -- @@ -929,15 +1013,6 @@ end  function characters.uccode(n) return uccodes[n] end -- obsolete  function characters.lccode(n) return lccodes[n] end -- obsolete -function characters.safechar(n) -    local c = data[n] -    if c and c.contextname then -        return "\\" .. c.contextname -    else -        return utfchar(n) -    end -end -  function characters.shape(n)      local shcode = shcodes[n]      if not shcode then @@ -992,36 +1067,36 @@ end  --     groupdata[group] = gdata  -- end ---~ characters.data, characters.groups = chardata, groupdata - ---~  [0xF0000]={ ---~   category="co", ---~   cjkwd="a", ---~   description="<Plane 0x000F Private Use, First>", ---~   direction="l", ---~   unicodeslot=0xF0000, ---~  }, ---~  [0xFFFFD]={ ---~   category="co", ---~   cjkwd="a", ---~   description="<Plane 0x000F Private Use, Last>", ---~   direction="l", ---~   unicodeslot=0xFFFFD, ---~  }, ---~  [0x100000]={ ---~   category="co", ---~   cjkwd="a", ---~   description="<Plane 0x0010 Private Use, First>", ---~   direction="l", ---~   unicodeslot=0x100000, ---~  }, ---~  [0x10FFFD]={ ---~   category="co", ---~   cjkwd="a", ---~   description="<Plane 0x0010 Private Use, Last>", ---~   direction="l", ---~   unicodeslot=0x10FFFD, ---~  }, +-- characters.data, characters.groups = chardata, groupdata + +--  [0xF0000]={ +--   category="co", +--   cjkwd="a", +--   description="<Plane 0x000F Private Use, First>", +--   direction="l", +--   unicodeslot=0xF0000, +--  }, +--  [0xFFFFD]={ +--   category="co", +--   cjkwd="a", +--   description="<Plane 0x000F Private Use, Last>", +--   direction="l", +--   unicodeslot=0xFFFFD, +--  }, +--  [0x100000]={ +--   category="co", +--   cjkwd="a", +--   description="<Plane 0x0010 Private Use, First>", +--   direction="l", +--   unicodeslot=0x100000, +--  }, +--  [0x10FFFD]={ +--   category="co", +--   cjkwd="a", +--   description="<Plane 0x0010 Private Use, Last>", +--   direction="l", +--   unicodeslot=0x10FFFD, +--  },  if not characters.superscripts then @@ -1078,259 +1153,6 @@ function characters.showstring(str)      end  end --- the following code will move to char-tex.lua - --- tex - -if not tex or not context or not commands then return characters end - -local tex           = tex -local texsetlccode  = tex.setlccode -local texsetuccode  = tex.setuccode -local texsetsfcode  = tex.setsfcode -local texsetcatcode = tex.setcatcode - -local contextsprint = context.sprint -local ctxcatcodes   = catcodes.numbers.ctxcatcodes - ---[[ldx-- -<p>Instead of using a <l n='tex'/> file to define the named glyphs, we -use the table. After all, we have this information available anyway.</p> ---ldx]]-- - -function commands.makeactive(n,name) -- -    contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)) - -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name) -end - -function commands.utfchar(c,n) -    if n then -     -- contextsprint(c,charfromnumber(n)) -        contextsprint(c,utfchar(n)) -    else -     -- contextsprint(charfromnumber(c)) -        contextsprint(utfchar(c)) -    end -end - -function commands.safechar(n) -    local c = data[n] -    if c and c.contextname then -        contextsprint("\\" .. c.contextname) -- context[c.contextname]() -    else -        contextsprint(utfchar(n)) -    end -end - -tex.uprint = commands.utfchar - -local forbidden = tohash { -- at least now -    0x00A0, -    0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D, -    0x202F, -    0x205F, - -- 0xFEFF, -} - -function characters.define(tobelettered, tobeactivated) -- catcodetables - -    if trace_defining then -        report_defining("defining active character commands") -    end - -    local activated, a = { }, 0 - -    for u, chr in next, data do -- these will be commands -        local fallback = chr.fallback -        if fallback then -            contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") -            a = a + 1 -            activated[a] = u -        else -            local contextname = chr.contextname -            if contextname then -                local category = chr.category -                if is_character[category] then -                    if chr.unicodeslot < 128 then -                        if is_letter[category] then -                            contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s -                        else -                            contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s -                        end -                    else -                        contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s -                    end -                elseif is_command[category] and not forbidden[u] then -                    contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -                    a = a + 1 -                    activated[a] = u -                end -            end -        end -    end - -    if tobelettered then -- shared -        local saved = tex.catcodetable -        for i=1,#tobelettered do -            tex.catcodetable = tobelettered[i] -            if trace_defining then -                report_defining("defining letters (global, shared)") -            end -            for u, chr in next, data do -                if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then -                    texsetcatcode(u,11) -                end -                local range = chr.range -                if range then -                    for i=1,range.first,range.last do -- tricky as not all are letters -                        texsetcatcode(i,11) -                    end -                end -            end -            texsetcatcode(0x200C,11) -- non-joiner -            texsetcatcode(0x200D,11) -- joiner -            for k, v in next, blocks do -                if v.catcode == "letter" then -                    for i=v.first,v.last do -                        texsetcatcode(i,11) -                    end -                end -            end -        end -        tex.catcodetable = saved -    end - -    local nofactivated = #tobeactivated -    if tobeactivated and nofactivated > 0 then -        for i=1,nofactivated do -            local u = activated[i] -            if u then -                report_defining("character %U is active in set %a, containing %a",u,data[u].description,tobeactivated) -            end -        end -        local saved = tex.catcodetable -        for i=1,#tobeactivated do -            local vector = tobeactivated[i] -            if trace_defining then -                report_defining("defining %a active characters in vector %a",nofactivated,vector) -            end -            tex.catcodetable = vector -            for i=1,nofactivated do -                local u = activated[i] -                if u then -                    texsetcatcode(u,13) -                end -            end -        end -        tex.catcodetable = saved -    end - -end - ---[[ldx-- -<p>Setting the lccodes is also done in a loop over the data table.</p> ---ldx]]-- - -local sfmode = "unset" -- unset, traditional, normal - -function characters.setcodes() -    if trace_defining then -        report_defining("defining lc and uc codes") -    end -    local traditional = sfstate == "traditional" or sfstate == "unset" -    for code, chr in next, data do -        local cc = chr.category -        if is_letter[cc] then -            local range = chr.range -            if range then -                for i=range.first,range.last do -                    texsetcatcode(i,11) -- letter -                    texsetlccode(i,i,i) -- self self -                end -            else -                local lc, uc = chr.lccode, chr.uccode -                if not lc then -                    chr.lccode, lc = code, code -                elseif type(lc) == "table" then -                    lc = code -                end -                if not uc then -                    chr.uccode, uc = code, code -                elseif type(uc) == "table" then -                    uc = code -                end -                texsetcatcode(code,11)   -- letter -                texsetlccode(code,lc,uc) -                if traditional and cc == "lu" then -                    texsetsfcode(code,999) -                end -            end -        elseif is_mark[cc] then -            texsetlccode(code,code,code) -- for hyphenation -        end -    end -    if traditional then -        sfstate = "traditional" -    end -end - --- If this is something that is not documentwide and used a lot, then we --- need a more clever approach (trivial but not now). - -local function setuppersfcodes(v,n) -    if sfstate ~= "unset" then -        report_defining("setting uppercase sf codes to %a",n) -        for code, chr in next, data do -            if chr.category == "lu" then -                texsetsfcode(code,n) -            end -        end -    end -    sfstate = v -end - -directives.register("characters.spaceafteruppercase",function(v) -    if v == "traditional" then -        setuppersfcodes(v,999) -    elseif v == "normal" then -        setuppersfcodes(v,1000) -    end -end) - --- tex - -function commands.chardescription(slot) -    local d = data[slot] -    if d then -        context(d.description) -    end -end - --- xml - -characters.activeoffset = 0x10000 -- there will be remapped in that byte range - -function commands.remapentity(chr,slot) -    contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr)) -end - --- xml.entities = xml.entities or { } --- --- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml --- --- function characters.setmkiventities() ---     local entities = xml.entities ---     entities.lt  = "<" ---     entities.amp = "&" ---     entities.gt  = ">" --- end --- --- function characters.setmkiientities() ---     local entities = xml.entities ---     entities.lt  = utfchar(characters.activeoffset + utfbyte("<")) ---     entities.amp = utfchar(characters.activeoffset + utfbyte("&")) ---     entities.gt  = utfchar(characters.activeoffset + utfbyte(">")) --- end +-- code moved to char-tex.lua -commands.definecatcodetable = characters.define -commands.setcharactercodes  = characters.setcodes +return characters diff --git a/tex/context/base/char-ini.mkiv b/tex/context/base/char-ini.mkiv index db52ae723..4fb63d93e 100644 --- a/tex/context/base/char-ini.mkiv +++ b/tex/context/base/char-ini.mkiv @@ -13,9 +13,7 @@  \writestatus{loading}{ConTeXt Character Support / Initialization} -\registerctxluafile{char-def}{1.001} % let's load this one first -\registerctxluafile{char-ini}{1.001} -\registerctxluafile{char-cjk}{1.001} +\registerctxluafile{char-fio}{1.001}  \registerctxluafile{char-map}{1.001} % maybe we will load this someplace else  \registerctxluafile{char-tex}{1.001} diff --git a/tex/context/base/char-tex.lua b/tex/context/base/char-tex.lua index 472cae930..a9a760c7a 100644 --- a/tex/context/base/char-tex.lua +++ b/tex/context/base/char-tex.lua @@ -7,16 +7,130 @@ if not modules then modules = { } end modules ['char-tex'] = {  }  local lpeg = lpeg +local context = context +local commands = commands -local find = string.find +local next, type = next, type +local format, find, gmatch = string.format, string.find, string.gmatch +local utfchar, utfbyte = utf.char, utf.byte +local concat, tohash = table.concat, table.tohash  local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc -local U, lpegmatch = lpeg.patterns.utf8, lpeg.match -local allocate, mark = utilities.storage.allocate, utilities.storage.mark +local lpegpatterns          = lpeg.patterns +local lpegmatch             = lpeg.match +local utf8byte              = lpegpatterns.utf8byte +local utf8char              = lpegpatterns.utf8char +local utfchartabletopattern = lpeg.utfchartabletopattern -characters       = characters or { } -local characters = characters -characters.tex   = characters.tex or { } +local allocate              = utilities.storage.allocate +local mark                  = utilities.storage.mark + +local characters            = characters +local texcharacters         = { } +characters.tex              = texcharacters +local utffilters            = characters.filters.utf + +local is_character          = characters.is_character +local is_letter             = characters.is_letter +local is_command            = characters.is_command +local is_spacing            = characters.is_spacing +local is_mark               = characters.is_mark +local is_punctuation        = characters.is_punctuation + +local data                  = characters.data  if not data then return end +local blocks                = characters.blocks + +local trace_defining        = false  trackers.register("characters.defining", function(v) characters_defining = v end) + +local report_defining       = logs.reporter("characters") + + + + + + + + + + + + + + +--[[ldx-- +<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to +8-bit. This is handled in the <l n='luatex'/> engine itself.</p> + +<p>This leaves us problems with characters that are specific to <l n='tex'/> like +<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files +are sensitive for to a private area (while writing to a utility file) and revert then +to their original slot when we read in such a file. Instead of reverting, we can (when +we resolve characters to glyphs) map them to their right glyph there. For this purpose +we can use the private planes 0x0F0000 and 0x100000.</p> +--ldx]]-- + +local low     = allocate() +local high    = allocate() +local escapes = allocate() +local special = "~#$%^&_{}\\|" -- "~#$%{}\\|" + +local private = { +    low     = low, +    high    = high, +    escapes = escapes, +} + +utffilters.private = private + +for ch in gmatch(special,".") do +    local cb +    if type(ch) == "number" then +        cb, ch = ch, utfchar(ch) +    else +        cb = utfbyte(ch) +    end +    if cb < 256 then +        escapes[ch] = "\\" .. ch +        low[ch] = utfchar(0x0F0000 + cb) +        if ch == "%" then +            ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted +        end +        high[utfchar(0x0F0000 + cb)] = ch +    end +end + +local tohigh = lpeg.replacer(low)   -- frozen, only for basic tex +local tolow  = lpeg.replacer(high)  -- frozen, only for basic tex + +lpegpatterns.utftohigh = tohigh +lpegpatterns.utftolow  = tolow + +function utffilters.harden(str) +    return lpegmatch(tohigh,str) +end + +function utffilters.soften(str) +    return lpegmatch(tolow,str) +end + +private.escape  = utf.remapper(escapes) +private.replace = utf.remapper(low) +private.revert  = utf.remapper(high) + +--[[ldx-- +<p>We get a more efficient variant of this when we integrate +replacements in collapser. This more or less renders the previous +private code redundant. The following code is equivalent but the +first snippet uses the relocated dollars.</p> + +<typing> +[x] [$x$] +</typing> +--ldx]]-- + +-- using the tree-lpeg-mapper would be nice but we also need to deal with end-of-string +-- cases: "\"\i" and don't want "\relax" to be seen as \r e lax" (for which we need to mess +-- with spaces  local accentmapping = allocate {      ['"'] = { [""] = "¨", @@ -128,7 +242,7 @@ local accentmapping = allocate {      },  } -characters.tex.accentmapping = accentmapping +texcharacters.accentmapping = accentmapping  local accent_map = allocate { -- incomplete     ['~'] = "̃" , --  ̃ Ẽ @@ -150,7 +264,7 @@ local accent_map = allocate { -- incomplete      --  ̰ Ḛ  } --- local accents = table.concat(table.keys(accentmapping)) -- was _map +-- local accents = concat(table.keys(accentmapping)) -- was _map  local function remap_accent(a,c,braced)      local m = accentmapping[a] @@ -171,7 +285,7 @@ local function remap_accent(a,c,braced)      end  end -local command_map = allocate { +local commandmapping = allocate {      ["i"]  = "ı",      ["l"]  = "ł",      ["ss"] = "ß", @@ -185,68 +299,125 @@ local command_map = allocate {      ["AA"] = "Å",  } --- no need for U here - -local achar    = R("az","AZ") + P("ı") + P("\\i") +texcharacters.commandmapping = commandmapping -local spaces   = P(" ")^0 -local no_l     = P("{") / "" -local no_r     = P("}") / "" -local no_b     = P('\\') / "" +-- local achar    = R("az","AZ") + P("ı") + P("\\i") +-- +-- local spaces   = P(" ")^0 +-- local no_l     = P("{") / "" +-- local no_r     = P("}") / "" +-- local no_b     = P('\\') / "" +-- +-- local lUr      = P("{") * C(achar) * P("}") +-- +-- local accents_1 = [["'.=^`~]] +-- local accents_2 = [[Hckruv]] +-- +-- local accent   = P('\\') * ( +--     C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up +--     C(S(accents_2)) *  lUr * Cc(true) +-- ) / remap_accent +-- +-- local csname  = P('\\') * C(R("az","AZ")^1) +-- +-- local command  = ( +--     csname + +--     P("{") * csname * spaces * P("}") +-- ) / commandmapping -- remap_commands +-- +-- local both_1 = Cs { "run", +--     accent  = accent, +--     command = command, +--     run     = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0, +-- } +-- +-- local both_2 = Cs { "run", +--     accent  = accent, +--     command = command, +--     run     = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0, +-- } +-- +-- function texcharacters.toutf(str,strip) +--     if not find(str,"\\") then +--         return str +--     elseif strip then +--         return lpegmatch(both_1,str) +--     else +--         return lpegmatch(both_2,str) +--     end +-- end -local lUr      = P("{") * C(achar) * P("}") +local untex -local accents_1 = [["'.=^`~]] -local accents_2 = [[Hckruv]] +local function toutfpattern() +    if not untex then +        local hash = { } +        for k, v in next, accentmapping do +            for kk, vv in next, v do +                if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then +                    hash[ "\\"..k.." "..kk     ] = vv +                    hash["{\\"..k.." "..kk.."}"] = vv +                else +                    hash["\\" ..k     ..kk     ] = vv +                    hash["{\\"..k     ..kk.."}"] = vv +                end +                hash["\\" ..k.."{"..kk.."}" ] = vv +                hash["{\\"..k.."{"..kk.."}}"] = vv +            end +        end +        for k, v in next, commandmapping do +            hash["\\"..k.." "] = v +            hash["{\\"..k.."}"] = v +            hash["{\\"..k.." }"] = v +        end +        untex = utfchartabletopattern(hash) / hash +    end +    return untex +end -local accent   = P('\\') * ( -    C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up -    C(S(accents_2)) *  lUr * Cc(true) -) / remap_accent +texcharacters.toutfpattern = toutfpattern -local csname  = P('\\') * C(R("az","AZ")^1) +local pattern = nil -local command  = ( -    csname + -    P("{") * csname * spaces * P("}") -) / command_map -- remap_commands +local function prepare() +    pattern = Cs((toutfpattern() + P(1))^0) +    return pattern +end -local both_1 = Cs { "run", -    accent  = accent, -    command = command, -    run     = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0, -} +function texcharacters.toutf(str,strip) +    if str == "" then +        return str +    elseif not find(str,"\\") then +        return str + -- elseif strip then +    else +        return lpegmatch(pattern or prepare(),str) +    end +end -local both_2 = Cs { "run", -    accent  = accent, -    command = command, -    run     = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0, -} +-- print(texcharacters.toutf([[\~{Z}]],true)) +-- print(texcharacters.toutf([[\'\i]],true)) +-- print(texcharacters.toutf([[\'{\i}]],true)) +-- print(texcharacters.toutf([[\"{e}]],true)) +-- print(texcharacters.toutf([[\" {e}]],true)) +-- print(texcharacters.toutf([[{\"{e}}]],true)) +-- print(texcharacters.toutf([[{\" {e}}]],true)) +-- print(texcharacters.toutf([[{\l}]],true)) +-- print(texcharacters.toutf([[{\l }]],true)) +-- print(texcharacters.toutf([[\v{r}]],true)) +-- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true)) +-- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true)) -function characters.tex.toutf(str,strip) -    if not find(str,"\\") then -        return str -    elseif strip then -        return lpegmatch(both_1,str) +function texcharacters.safechar(n) -- was characters.safechar +    local c = data[n] +    if c and c.contextname then +        return "\\" .. c.contextname      else -        return lpegmatch(both_2,str) +        return utfchar(n)      end  end --- print(characters.tex.toutf([[\~{Z}]],true)) --- print(characters.tex.toutf([[\'\i]],true)) --- print(characters.tex.toutf([[\'{\i}]],true)) --- print(characters.tex.toutf([[\"{e}]],true)) --- print(characters.tex.toutf([[\" {e}]],true)) --- print(characters.tex.toutf([[{\"{e}}]],true)) --- print(characters.tex.toutf([[{\" {e}}]],true)) --- print(characters.tex.toutf([[{\l}]],true)) --- print(characters.tex.toutf([[{\l }]],true)) --- print(characters.tex.toutf([[\v{r}]],true)) --- print(characters.tex.toutf([[fo{\"o}{\ss}ar]],true)) --- print(characters.tex.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true)) - -function characters.tex.defineaccents() +function texcharacters.defineaccents()      for accent, group in next, accentmapping do          context.dodefineaccentcommand(accent)          for character, mapping in next, group do @@ -254,3 +425,256 @@ function characters.tex.defineaccents()          end      end  end + +-- all kind of initializations + +local tex           = tex +local texsetlccode  = tex.setlccode +local texsetuccode  = tex.setuccode +local texsetsfcode  = tex.setsfcode +local texsetcatcode = tex.setcatcode + +local contextsprint = context.sprint +local ctxcatcodes   = catcodes.numbers.ctxcatcodes + +--[[ldx-- +<p>Instead of using a <l n='tex'/> file to define the named glyphs, we +use the table. After all, we have this information available anyway.</p> +--ldx]]-- + +function commands.makeactive(n,name) -- +    contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)) + -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name) +end + +function commands.utfchar(c,n) +    if n then +     -- contextsprint(c,charfromnumber(n)) +        contextsprint(c,utfchar(n)) +    else +     -- contextsprint(charfromnumber(c)) +        contextsprint(utfchar(c)) +    end +end + +function commands.safechar(n) +    local c = data[n] +    if c and c.contextname then +        contextsprint("\\" .. c.contextname) -- context[c.contextname]() +    else +        contextsprint(utfchar(n)) +    end +end + +tex.uprint = commands.utfchar + +local forbidden = tohash { -- at least now +    0x00A0, +    0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D, +    0x202F, +    0x205F, + -- 0xFEFF, +} + +function characters.define(tobelettered, tobeactivated) -- catcodetables + +    if trace_defining then +        report_defining("defining active character commands") +    end + +    local activated, a = { }, 0 + +    for u, chr in next, data do -- these will be commands +        local fallback = chr.fallback +        if fallback then +            contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") +            a = a + 1 +            activated[a] = u +        else +            local contextname = chr.contextname +            if contextname then +                local category = chr.category +                if is_character[category] then +                    if chr.unicodeslot < 128 then +                        if is_letter[category] then +                            contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s +                        else +                            contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s +                        end +                    else +                        contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s +                    end +                elseif is_command[category] and not forbidden[u] then +                    contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") +                    a = a + 1 +                    activated[a] = u +                end +            end +        end +    end + +    if tobelettered then -- shared +        local saved = tex.catcodetable +        for i=1,#tobelettered do +            tex.catcodetable = tobelettered[i] +            if trace_defining then +                report_defining("defining letters (global, shared)") +            end +            for u, chr in next, data do +                if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then +                    texsetcatcode(u,11) +                end +                local range = chr.range +                if range then +                    for i=1,range.first,range.last do -- tricky as not all are letters +                        texsetcatcode(i,11) +                    end +                end +            end +            texsetcatcode(0x200C,11) -- non-joiner +            texsetcatcode(0x200D,11) -- joiner +            for k, v in next, blocks do +                if v.catcode == "letter" then +                    for i=v.first,v.last do +                        texsetcatcode(i,11) +                    end +                end +            end +        end +        tex.catcodetable = saved +    end + +    local nofactivated = #tobeactivated +    if tobeactivated and nofactivated > 0 then +        for i=1,nofactivated do +            local u = activated[i] +            if u then +                report_defining("character %U is active in set %a, containing %a",u,data[u].description,tobeactivated) +            end +        end +        local saved = tex.catcodetable +        for i=1,#tobeactivated do +            local vector = tobeactivated[i] +            if trace_defining then +                report_defining("defining %a active characters in vector %a",nofactivated,vector) +            end +            tex.catcodetable = vector +            for i=1,nofactivated do +                local u = activated[i] +                if u then +                    texsetcatcode(u,13) +                end +            end +        end +        tex.catcodetable = saved +    end + +end + +--[[ldx-- +<p>Setting the lccodes is also done in a loop over the data table.</p> +--ldx]]-- + +local sfmode = "unset" -- unset, traditional, normal + +function characters.setcodes() +    if trace_defining then +        report_defining("defining lc and uc codes") +    end +    local traditional = sfstate == "traditional" or sfstate == "unset" +    for code, chr in next, data do +        local cc = chr.category +        if is_letter[cc] then +            local range = chr.range +            if range then +                for i=range.first,range.last do +                    texsetcatcode(i,11) -- letter +                    texsetlccode(i,i,i) -- self self +                end +            else +                local lc, uc = chr.lccode, chr.uccode +                if not lc then +                    chr.lccode, lc = code, code +                elseif type(lc) == "table" then +                    lc = code +                end +                if not uc then +                    chr.uccode, uc = code, code +                elseif type(uc) == "table" then +                    uc = code +                end +                texsetcatcode(code,11)   -- letter +                texsetlccode(code,lc,uc) +                if traditional and cc == "lu" then +                    texsetsfcode(code,999) +                end +            end +        elseif is_mark[cc] then +            texsetlccode(code,code,code) -- for hyphenation +        end +    end +    if traditional then +        sfstate = "traditional" +    end +end + +-- If this is something that is not documentwide and used a lot, then we +-- need a more clever approach (trivial but not now). + +local function setuppersfcodes(v,n) +    if sfstate ~= "unset" then +        report_defining("setting uppercase sf codes to %a",n) +        for code, chr in next, data do +            if chr.category == "lu" then +                texsetsfcode(code,n) +            end +        end +    end +    sfstate = v +end + +directives.register("characters.spaceafteruppercase",function(v) +    if v == "traditional" then +        setuppersfcodes(v,999) +    elseif v == "normal" then +        setuppersfcodes(v,1000) +    end +end) + +-- tex + +function commands.chardescription(slot) +    local d = data[slot] +    if d then +        context(d.description) +    end +end + +-- xml + +characters.activeoffset = 0x10000 -- there will be remapped in that byte range + +function commands.remapentity(chr,slot) +    contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr)) +end + +-- xml.entities = xml.entities or { } +-- +-- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml +-- +-- function characters.setmkiventities() +--     local entities = xml.entities +--     entities.lt  = "<" +--     entities.amp = "&" +--     entities.gt  = ">" +-- end +-- +-- function characters.setmkiientities() +--     local entities = xml.entities +--     entities.lt  = utfchar(characters.activeoffset + utfbyte("<")) +--     entities.amp = utfchar(characters.activeoffset + utfbyte("&")) +--     entities.gt  = utfchar(characters.activeoffset + utfbyte(">")) +-- end + +commands.definecatcodetable = characters.define +commands.setcharactercodes  = characters.setcodes diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua index 98a780dcd..fcd300f6b 100644 --- a/tex/context/base/char-utf.lua +++ b/tex/context/base/char-utf.lua @@ -6,11 +6,6 @@ if not modules then modules = { } end modules ['char-utf'] = {      license   = "see context related readme files"  } --- todo: trackers --- todo: no longer special characters (high) here, only needed in special cases and --- these don't go through this file anyway --- graphemes: basic symbols -  --[[ldx--  <p>When a sequence of <l n='utf'/> characters enters the application, it may be  neccessary to collapse subsequences into their composed variant.</p> @@ -24,44 +19,46 @@ of output (for instance <l n='pdf'/>).</p>  over a string.</p>  --ldx]]-- -local gmatch, gsub, find = string.gmatch, string.gsub, string.find +local gsub, find = string.gsub, string.find  local concat, sortedhash, keys, sort = table.concat, table.sortedhash, table.keys, table.sort  local utfchar, utfbyte, utfcharacters, utfvalues = utf.char, utf.byte, utf.characters, utf.values -local allocate = utilities.storage.allocate -local lpegmatch, lpegpatterns, P, Cs, Cmt, Ct = lpeg.match, lpeg.patterns, lpeg.P, lpeg.Cs, lpeg.Cmt, lpeg.Ct +local P, Cs, Cmt, Ct = lpeg.P, lpeg.Cs, lpeg.Cmt, lpeg.Ct + +if not characters        then require("char-def") end +if not characters.blocks then require("char-ini") end +local lpegmatch             = lpeg.match +local lpegpatterns          = lpeg.patterns  local p_utf8character       = lpegpatterns.utf8character  local utfchartabletopattern = lpeg.utfchartabletopattern -if not characters then -    require("char-def") -end +local allocate              = utilities.storage.allocate or function() return { } end -local charfromnumber   = characters.fromnumber +local charfromnumber        = characters.fromnumber -characters             = characters or { } -local characters       = characters +characters                  = characters or { } +local characters            = characters -local graphemes        = allocate() -characters.graphemes   = graphemes +local graphemes             = allocate() +characters.graphemes        = graphemes -local collapsed        = allocate() -characters.collapsed   = collapsed +local collapsed             = allocate() +characters.collapsed        = collapsed -local combined         = allocate() -characters.combined    = combined +local combined              = allocate() +characters.combined         = combined -local decomposed       = allocate() -characters.decomposed  = decomposed +local decomposed            = allocate() +characters.decomposed       = decomposed -local mathpairs        = allocate() -characters.mathpairs   = mathpairs +local mathpairs             = allocate() +characters.mathpairs        = mathpairs -local filters          = allocate() -characters.filters     = filters +local filters               = allocate() +characters.filters          = filters -local utffilters       = { } -characters.filters.utf = utffilters +local utffilters            = { } +characters.filters.utf      = utffilters  -- is characters.combined cached? @@ -221,92 +218,28 @@ end  characters.initialize = initialize  --[[ldx-- -<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to -8-bit. This is handled in the <l n='luatex'/> engine itself.</p> - -<p>This leaves us problems with characters that are specific to <l n='tex'/> like -<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files -are sensitive for to a private area (while writing to a utility file) and revert then -to their original slot when we read in such a file. Instead of reverting, we can (when -we resolve characters to glyphs) map them to their right glyph there. For this purpose -we can use the private planes 0x0F0000 and 0x100000.</p> ---ldx]]-- - -local low     = allocate() -local high    = allocate() -local escapes = allocate() -local special = "~#$%^&_{}\\|" -- "~#$%{}\\|" - -local private = { -    low     = low, -    high    = high, -    escapes = escapes, -} - -utffilters.private = private - -local tohigh = lpeg.replacer(low)   -- frozen, only for basic tex -local tolow  = lpeg.replacer(high)  -- frozen, only for basic tex - -lpegpatterns.utftohigh = tohigh -lpegpatterns.utftolow  = tolow - -function utffilters.harden(str) -    return lpegmatch(tohigh,str) -end - -function utffilters.soften(str) -    return lpegmatch(tolow,str) -end - -local function set(ch) -    local cb -    if type(ch) == "number" then -        cb, ch = ch, utfchar(ch) -    else -        cb = utfbyte(ch) -    end -    if cb < 256 then -        escapes[ch] = "\\" .. ch -        low[ch] = utfchar(0x0F0000 + cb) -        if ch == "%" then -            ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted -        end -        high[utfchar(0x0F0000 + cb)] = ch -    end -end - -private.set = set - --- function private.escape (str) return    gsub(str,"(.)", escapes) end --- function private.replace(str) return utfgsub(str,"(.)", low    ) end --- function private.revert (str) return utfgsub(str,"(.)", high   ) end - -private.escape  = utf.remapper(escapes) -private.replace = utf.remapper(low) -private.revert  = utf.remapper(high) - -for ch in gmatch(special,".") do set(ch) end - ---[[ldx-- -<p>We get a more efficient variant of this when we integrate -replacements in collapser. This more or less renders the previous -private code redundant. The following code is equivalent but the -first snippet uses the relocated dollars.</p> - -<typing> -[x] [$x$] -</typing> -  <p>The next variant has lazy token collecting, on a 140 page mk.tex this saves  about .25 seconds, which is understandable because we have no graphemes and  not collecting tokens is not only faster but also saves garbage collecting.  </p>  --ldx]]-- -local skippable  = table.tohash { "mkiv", "mkvi", "mkix", "mkxi" } +local skippable  = { }  local filesuffix = file.suffix +function utffilters.setskippable(suffix,value) +    if value == nil then +        value = true +    end +    if type(suffix) == "table" then +        for i=1,#suffix do +            skippable[suffix[i]] = value +        end +    else +        skippable[suffix] = value +    end +end +  -- function utffilters.collapse(str,filename)   -- we can make high a seperate pass (never needed with collapse)  --     if skippable[filesuffix(filename)] then  --         return str @@ -406,7 +339,7 @@ local filesuffix = file.suffix  --                 return concat(tokens) -- seldom called  --             end  --         elseif nstr > 0 then ---             return high[str] or str -- thsi will go from here +--             return high[str] or str -- this will go from here  --         end  --     end  --     return str @@ -420,7 +353,7 @@ local function prepare()      if initialize then          initialize()      end -    local tree = utfchartabletopattern(keys(collapsed)) +    local tree = utfchartabletopattern(collapsed)      p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) -- the P(1) is needed in order to accept non utf  end @@ -487,7 +420,7 @@ end  --         if initialize then  --             initialize()  --         end ---         local tree = utfchartabletopattern(keys(decomposed)) +--         local tree = utfchartabletopattern(decomposed)  --         finder   = lpeg.finder(tree,false,true)  --         replacer = lpeg.replacer(tree,decomposed,false,true)  --     end @@ -503,11 +436,11 @@ local function prepare()      if initialize then          initialize()      end -    local tree = utfchartabletopattern(keys(decomposed)) +    local tree = utfchartabletopattern(decomposed)      p_decompose = Cs((tree/decomposed + p_utf8character)^0 * P(-1))  end -function utffilters.decompose(str) -- 3 to 4 times faster than the above +function utffilters.decompose(str,filename) -- 3 to 4 times faster than the above      if not p_decompose then          prepare()      end @@ -619,12 +552,12 @@ local function prepare()              hash[utfchar(k)] = { utfchar(k), combining, 0 } -- slot 3 can be used in sort          end      end -    local e = utfchartabletopattern(keys(exceptions)) -    local p = utfchartabletopattern(keys(hash)) +    local e = utfchartabletopattern(exceptions) +    local p = utfchartabletopattern(hash)      p_reorder = Cs((e/exceptions + Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1)  end -function utffilters.reorder(str) +function utffilters.reorder(str,filename)      if not p_reorder then          prepare()      end @@ -638,141 +571,6 @@ function utffilters.reorder(str)      return str  end --- -- - -local sequencers = utilities.sequencers - -if sequencers then - -    local textfileactions = resolvers.openers.helpers.textfileactions -    local textlineactions = resolvers.openers.helpers.textlineactions - -    sequencers.appendaction (textfileactions,"system","characters.filters.utf.reorder") -    sequencers.disableaction(textfileactions,"characters.filters.utf.reorder") - -    sequencers.appendaction (textlineactions,"system","characters.filters.utf.reorder") -    sequencers.disableaction(textlineactions,"characters.filters.utf.reorder") - -    sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse") -    sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") - -    sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose") -    sequencers.disableaction(textfileactions,"characters.filters.utf.decompose") - -    function characters.filters.utf.enable() -        sequencers.enableaction(textfileactions,"characters.filters.utf.reorder") -        sequencers.enableaction(textfileactions,"characters.filters.utf.collapse") -        sequencers.enableaction(textfileactions,"characters.filters.utf.decompose") -    end - -    local function configure(what,v) -        if not v then -            sequencers.disableaction(textfileactions,what) -            sequencers.disableaction(textlineactions,what) -        elseif v == "line" then -            sequencers.disableaction(textfileactions,what) -            sequencers.enableaction (textlineactions,what) -        else -- true or text -            sequencers.enableaction (textfileactions,what) -            sequencers.disableaction(textlineactions,what) -        end -    end - -    directives.register("filters.utf.reorder", function(v) -        configure("characters.filters.utf.reorder",v) -    end) - -    directives.register("filters.utf.collapse", function(v) -        configure("characters.filters.utf.collapse",v) -    end) - -    directives.register("filters.utf.decompose", function(v) -        configure("characters.filters.utf.decompose",v) -    end) - -end - --- Faster when we deal with lots of data but somewhat complicated by the fact that we want to be --- downward compatible .. so maybe some day I'll simplify it. We seldom have large quantities of --- text. - --- local p_processed = nil -- so we can reset if needed --- --- function utffilters.preprocess(str,filename) ---     if not p_processed then ---         if initialize then ---             initialize() ---         end ---         local merged = table.merged(collapsed,decomposed) ---         local tree   = utfchartabletopattern(keys(merged)) ---         p_processed  = Cs((tree/merged     + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf ---         local tree   = utfchartabletopattern(keys(collapsed)) ---         p_collapse   = Cs((tree/collapsed  + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf ---         local tree   = utfchartabletopattern(keys(decomposed)) ---         p_decompose  = Cs((tree/decomposed + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf ---     end ---     if not str or #str == "" or #str == 1 then ---         return str ---     elseif filename and skippable[filesuffix(filename)] then -- we could hash the collapsables or do a quicker test ---         return str ---     else ---         return lpegmatch(p_processed,str) or str ---     end --- end --- --- local sequencers = utilities.sequencers --- --- if sequencers then --- ---     local textfileactions = resolvers.openers.helpers.textfileactions --- ---     local collapse, decompose = false, false --- ---     sequencers.appendaction (textfileactions,"system","characters.filters.utf.preprocess") ---     sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess") --- ---     local function checkable() ---         if decompose then ---             if collapse then ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.decompose") ---                 sequencers.enableaction (textfileactions,"characters.filters.utf.preprocess") ---             else ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") ---                 sequencers.enableaction (textfileactions,"characters.filters.utf.decompose") ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess") ---             end ---         else ---             if collapse then ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.decompose") ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess") ---             else ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.decompose") ---                 sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess") ---             end ---         end ---     end --- ---     function characters.filters.utf.enable() ---         collapse  = true ---         decompose = true ---         checkable() ---     end --- ---     directives.register("filters.utf.collapse", function(v) ---         collapse = v ---         checkable() ---     end) --- ---     directives.register("filters.utf.decompose", function(v) ---         decompose = v ---         checkable() ---     end) --- --- end -  -- local collapse   = utffilters.collapse  -- local decompose  = utffilters.decompose  -- local preprocess = utffilters.preprocess @@ -815,3 +613,5 @@ end  -- local done = utffilters.reorder(test)  --  -- print(test,done,test==done,false) + +return characters diff --git a/tex/context/base/char-utf.mkiv b/tex/context/base/char-utf.mkiv index 280e7ef6d..381360905 100644 --- a/tex/context/base/char-utf.mkiv +++ b/tex/context/base/char-utf.mkiv @@ -22,22 +22,15 @@  \unprotect +\registerctxluafile{char-def}{1.001} +\registerctxluafile{char-ini}{1.001}  \registerctxluafile{char-utf}{1.001} +\registerctxluafile{char-cjk}{1.001}  %D We enable collapsing (combining characters) by default, but  %D since the source files are rather simple, we postpone the  %D initialization till runtime. -% resolvers.filters.install('utf',characters.filters.utf.collapse) - -% \appendtoks -%     \ctxlua{ -%         local textfileactions = resolvers.openers.helpers.textfileactions -%         utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse") -%         utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose") -%     }% -% \to \everyjob -  \appendtoks      \ctxlua{characters.filters.utf.enable()}%  \to \everyjob diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 22bda98b0..f0977ee79 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@  %C therefore copyrighted by \PRAGMA. See mreadme.pdf for  %C details. -\newcontextversion{2014.07.04 15:55} +\newcontextversion{2014.07.06 22:50}  %D This file is loaded at runtime, thereby providing an excellent place for  %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf Binary files differindex bb3c1a555..7e082ed9e 100644 --- a/tex/context/base/context-version.pdf +++ b/tex/context/base/context-version.pdf diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index f92d65902..60e0f18c5 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -28,7 +28,7 @@  %D up and the dependencies are more consistent.  \edef\contextformat {\jobname} -\edef\contextversion{2014.07.04 15:55} +\edef\contextversion{2014.07.06 22:50}  \edef\contextkind   {beta}  %D For those who want to use this: @@ -112,9 +112,9 @@  \loadmarkfile{supp-dir} -\loadmarkfile{char-ini} -\loadmarkfile{char-utf} -\loadmarkfile{char-act} +\loadmarkfile{char-utf} % generic code (i.e. not much tex) ... could become unic-ini +\loadmarkfile{char-ini} % tex / context specific +\loadmarkfile{char-act} % even more specific  \loadmarkfile{mult-ini}  \loadmarkfile{mult-sys} diff --git a/tex/context/base/font-enc.lua b/tex/context/base/font-enc.lua index 5305f0736..2e8b722de 100644 --- a/tex/context/base/font-enc.lua +++ b/tex/context/base/font-enc.lua @@ -8,6 +8,7 @@ if not modules then modules = { } end modules ['font-enc'] = {  -- this module is obsolete +local next = next  local match, gmatch, gsub = string.match, string.gmatch, string.gsub  local setmetatableindex = table.setmetatableindex @@ -125,7 +126,12 @@ function encodings.make_unicode_vector()          end      end      for name, code in next, characters.synonyms do -        vector[code], hash[name] = name, code +        if not vector[code] then +            vector[code] = name +        end +        if not hash[name] then +            hash[name]   = code +        end      end      return containers.write(encodings.cache, 'unicode', { name='unicode', tag='unicode', vector=vector, hash=hash })  end diff --git a/tex/context/base/font-pre.mkiv b/tex/context/base/font-pre.mkiv index fc6eb289e..cb5b193f6 100644 --- a/tex/context/base/font-pre.mkiv +++ b/tex/context/base/font-pre.mkiv @@ -100,14 +100,14 @@     features=no]  \definefontfeature -  [semetic-complete] +  [semitic-complete]    [mode=node,analyze=yes,language=dflt,ccmp=yes,     init=yes,medi=yes,fina=yes,isol=yes,     mark=yes,mkmk=yes,kern=yes,curs=yes,     liga=yes,dlig=yes,rlig=yes,clig=yes,calt=yes]  \definefontfeature -  [semetic-simple] +  [semitic-simple]    [mode=node,analyze=yes,language=dflt,ccmp=yes,     init=yes,medi=yes,fina=yes,isol=yes,     mark=yes,mkmk=yes,kern=yes,curs=yes, @@ -115,22 +115,22 @@  \definefontfeature    [arabic] -  [semetic-complete] +  [semitic-complete]    [script=arab]  \definefontfeature    [hebrew] -  [semetic-complete] +  [semitic-complete]    [script=hebr]  \definefontfeature    [simplearabic] -  [semetic-simple] +  [semitic-simple]    [script=arab]  \definefontfeature    [simplehebrew] -  [semetic-simple] +  [semitic-simple]    [script=hebr]  % \definefont [DevaOne] [file:chandas.ttf*devanagari-one at 12pt] diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index c203d8044..79e75a7b7 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -897,17 +897,35 @@ end  function lpeg.utfchartabletopattern(list) -- goes to util-lpg      local tree = { }      local hash = { } -    for i=1,#list do -        local t = tree -        for c in gmatch(list[i],".") do -            local tc = t[c] -            if not tc then -                tc = { } -                t[c] = tc +    local n = #list +    if n == 0 then +        -- we could always use this branch +        for s in next, list do +            local t = tree +            for c in gmatch(s,".") do +                local tc = t[c] +                if not tc then +                    tc = { } +                    t[c] = tc +                end +                t = tc +            end +            hash[t] = s +        end +    else +        for i=1,n do +            local t = tree +            local s = list[i] +            for c in gmatch(s,".") do +                local tc = t[c] +                if not tc then +                    tc = { } +                    t[c] = tc +                end +                t = tc              end -            t = tc +            hash[t] = s          end -        hash[t] = list[i]      end      return make(tree,hash)  end diff --git a/tex/context/base/publ-aut.lua b/tex/context/base/publ-aut.lua index b35af1bcc..0167d66e7 100644 --- a/tex/context/base/publ-aut.lua +++ b/tex/context/base/publ-aut.lua @@ -233,6 +233,7 @@ local function the_initials(initials,symbol)  end  local ctx_btxsetconcat        = context.btxsetconcat +local ctx_btxsetauthorindex   = context.btxsetauthorindex  local ctx_btxsetoverflow      = context.btxsetoverflow  local ctx_btxsetinitials      = context.btxsetinitials  local ctx_btxsetfirstnames    = context.btxsetfirstnames @@ -248,6 +249,56 @@ local ctx_btxstopauthor       = context.btxstopauthor  local concatstate = publications.concatstate  local f_invalid   = formatters["<invalid %s: %s>"] +local currentauthordata   = nil +local currentauthorsymbol = nil + +local manipulators       = typesetters.manipulators +local splitmanipulation  = manipulators.splitspecification +local applymanipulation  = manipulators.applyspecification +local manipulatormethods = manipulators.methods + +local function value(i,field) +    if currentauthordata then +        local entry = currentauthordata[i] +        if entry then +            local value = entry[field] +            if value and #value > 0 then +                return value +            end +        end +    end +end + +function commands.btx_a_i(i) local v = value(i,"initials")   if v then context(concat(the_initials(v,currentauthorsymbol or "."))) end end +function commands.btx_a_f(i) local v = value(i,"firstnames") if v then context(concat(v," ")) end end +function commands.btx_a_j(i) local v = value(i,"juniors")    if v then context(concat(v," ")) end end +function commands.btx_a_s(i) local v = value(i,"surnames")   if v then context(concat(v," ")) end end +function commands.btx_a_v(i) local v = value(i,"vons")       if v then context(concat(v," ")) end end + +function commands.btxauthorfield(i,field) +    if currentauthordata then +        local entry = currentauthordata[i] +        if entry then +            local manipulator, field = splitmanipulation(field) +            local value = entry[field] +            if not value or #value == 0 then +                -- value, no need for message +            elseif manipulator then +                for i=1,#value do +                    if i > 1 then +                        context(" ") -- symbol ? +                    end +                    context(applymanipulation(manipulator,value) or value) +                end +            elseif field == "initials" then +                context(concat(the_initials(value,currentauthorsymbol or "."))) +            else +                context(concat(value," ")) +            end +         end +    end +end +  function commands.btxauthor(dataset,tag,field,settings)      local ds = datasets[dataset]      if not ds then @@ -279,30 +330,32 @@ function commands.btxauthor(dataset,tag,field,settings)      if max > etallimit and etaldisplay < max then          max = etaldisplay      end +    currentauthordata   = split +    currentauthorsymbol = symbol      for i=1,max do -        ctx_btxstartauthor() -- i, max +        ctx_btxstartauthor(i,max)          ctx_btxsetconcat(concatstate(i,max))          ctx_btxsetauthorvariant(combiner)          local author = split[i]          local initials = author.initials -        if initials then -            ctx_btxsetinitials(concat(the_initials(initials,symbol)," ")) +        if initials and #initials > 0 then +            ctx_btxsetinitials() -- (concat(the_initials(initials,symbol)," "))          end          local firstnames = author.firstnames -        if firstnames then -            ctx_btxsetfirstnames(concat(firstnames," ")) +        if firstnames and #firstnames > 0 then +            ctx_btxsetfirstnames() -- (concat(firstnames," "))          end          local vons = author.vons -        if vons then -            ctx_btxsetvons(concat(vons," ")) +        if vons and #vons > 0 then +            ctx_btxsetvons() -- (concat(vons," "))          end          local surnames = author.surnames -        if surnames then -            ctx_btxsetsurnames(concat(surnames," ")) +        if surnames and #surnames > 0 then +            ctx_btxsetsurnames() -- (concat(surnames," "))          end          local juniors = author.juniors -        if juniors then -            ctx_btxsetjuniors(concat(juniors," ")) +        if juniors and #juniors > 0 then +            ctx_btxsetjuniors() -- (concat(juniors," "))          end          ctx_btxsetup(combiner)          ctx_btxstopauthor() @@ -317,6 +370,7 @@ end  -- pays off.  local compare  = sorters.comparers.basic -- (a,b) +-- local compare  = sorters.basicsorter -- (a,b)  local strip    = sorters.strip  local splitter = sorters.splitters.utf @@ -480,7 +534,7 @@ function authors.sorted(dataset,list,sorttype) -- experimental      if #valid == 0 or #valid ~= #list then          return list      else -        sorters.sort(valid,compare) +        sorters.sort(valid,function(a,b) return a ~= b and compare(a,b) == -1 end)          for i=1,#valid do              valid[i] = valid[i].index          end diff --git a/tex/context/base/publ-imp-author.mkvi b/tex/context/base/publ-imp-author.mkvi index e21353f63..29714ec03 100644 --- a/tex/context/base/publ-imp-author.mkvi +++ b/tex/context/base/publ-imp-author.mkvi @@ -24,28 +24,13 @@  % You can adapt these setups to your liking, for instance as: -% \startsetups btx:cite:author:normal -%     \fastsetup{btx:cite:author:concat} -%     \ifx\currentbtxfirstnames\empty \else -%         \begingroup -%             \bf -%             \currentbtxfirstnames -%         \endgroup -%         \btxcitevariantparameter{firstnamesep} -%     \fi -%     \ifx\currentbtxvons\empty \else -%         \currentbtxvons -%         \btxcitevariantparameter{vonsep} -%     \fi -%     \ifx\currentbtxsurnames\empty \else -%         \currentbtxsurnames -%         \ifx\currentbtxjuniors\empty \else -%             \btxcitevariantparameter{juniorsep} -%             \currentbtxjuniors -%         \fi -%     \fi -%     \fastsetup{btx:cite:author:etaltext} -% \stopsetups +% these can be used instead of the macros and they accept manipulator prefixes +% +% \currentbtxinitials   : \btxauthorfield{initials} +% \currentbtxfirstnames : \btxauthorfield{firstnames} +% \currentbtxvons       : \btxauthorfield{vons} +% \currentbtxsurnames   : \btxauthorfield{surnames} +% \currentbtxjuniors    : \btxauthorfield{juniors}  \startsetups \s!btx:\s!cite:\s!author:concat      \ifcase\currentbtxconcat \or \or @@ -174,7 +159,9 @@  \stopsetups  \startsetups \s!btx:\s!list:\s!author:etaltext -    \btxcitevariantparameter\c!etaltext +    \ifcase\currentbtxoverflow \else +        \btxlistvariantparameter\c!etaltext +    \fi  \stopsetups  \startsetups \s!btx:\s!list:\s!author:normal diff --git a/tex/context/base/publ-ini.mkiv b/tex/context/base/publ-ini.mkiv index 5f8e335fe..bf8c29363 100644 --- a/tex/context/base/publ-ini.mkiv +++ b/tex/context/base/publ-ini.mkiv @@ -318,12 +318,14 @@  % \let\btxsetdataset\setbtxdataset  % \let\btxsetentry  \setbtxentry -\def\btxfield   #1{\ctxcommand{btxfield("\currentbtxdataset","\currentbtxtag","#1")}} -\def\btxdetail  #1{\ctxcommand{btxdetail("\currentbtxdataset","\currentbtxtag","#1")}} -\def\btxflush   #1{\ctxcommand{btxflush("\currentbtxdataset","\currentbtxtag","#1")}} -\def\btxdoifelse#1{\ctxcommand{btxdoifelse("\currentbtxdataset","\currentbtxtag","#1")}} -\def\btxdoif    #1{\ctxcommand{btxdoif("\currentbtxdataset","\currentbtxtag","#1")}} -\def\btxdoifnot #1{\ctxcommand{btxdoifnot("\currentbtxdataset","\currentbtxtag","#1")}} +\def\btxfield      #1{\ctxcommand{btxfield("\currentbtxdataset","\currentbtxtag","#1")}} +\def\btxdetail     #1{\ctxcommand{btxdetail("\currentbtxdataset","\currentbtxtag","#1")}} +\def\btxauthorfield#1{\ctxcommand{btxauthorfield(\number\currentbtxauthorindex,"#1")}} +\def\btxflush      #1{\ctxcommand{btxflush("\currentbtxdataset","\currentbtxtag","#1")}} +\def\btxdoifelse   #1{\ctxcommand{btxdoifelse("\currentbtxdataset","\currentbtxtag","#1")}} +\def\btxdoif       #1{\ctxcommand{btxdoif("\currentbtxdataset","\currentbtxtag","#1")}} +\def\btxdoifnot    #1{\ctxcommand{btxdoifnot("\currentbtxdataset","\currentbtxtag","#1")}} +  \let\btxsetup\fastsetup @@ -353,20 +355,41 @@  \let\currentbtxcombis       \empty    \unexpanded\def\btxsetcombis       {\def\currentbtxcombis}  \let\currentbtxdataset      \empty    \unexpanded\def\btxsetdataset      {\def\currentbtxdataset}  \let\currentbtxfirst        \empty    \unexpanded\def\btxsetfirst        {\def\currentbtxfirst} -\let\currentbtxfirstnames   \empty    \unexpanded\def\btxsetfirstnames   {\def\currentbtxfirstnames} -\let\currentbtxinitials     \empty    \unexpanded\def\btxsetinitials     {\def\currentbtxinitials}  \let\currentbtxinternal     \empty    \unexpanded\def\btxsetinternal     {\def\currentbtxinternal} -\let\currentbtxjuniors      \empty    \unexpanded\def\btxsetjuniors      {\def\currentbtxjuniors}  \let\currentbtxlanguage     \empty    \unexpanded\def\btxsetlanguage     {\def\currentbtxlanguage}  \let\currentbtxsecond       \empty    \unexpanded\def\btxsetsecond       {\def\currentbtxsecond} -\let\currentbtxsurnames     \empty    \unexpanded\def\btxsetsurnames     {\def\currentbtxsurnames}  \let\currentbtxtag          \empty    \unexpanded\def\btxsettag          {\def\currentbtxtag} -\let\currentbtxvons         \empty    \unexpanded\def\btxsetvons         {\def\currentbtxvons}  \let\currentbtxauthorvariant\v!normal \unexpanded\def\btxsetauthorvariant{\def\currentbtxauthorvariant} -\newconstant\currentbtxoverflow \unexpanded\def\btxsetoverflow#1{\currentbtxoverflow#1\relax} -\newconstant\currentbtxconcat   \unexpanded\def\btxsetconcat  #1{\currentbtxconcat  #1\relax} -\newconstant\currentbtxcount    \unexpanded\def\btxsetcount   #1{\currentbtxcount   #1\relax} +%let\currentbtxfirstnames   \empty    \unexpanded\def\btxsetfirstnames   {\def\currentbtxfirstnames} +%let\currentbtxinitials     \empty    \unexpanded\def\btxsetinitials     {\def\currentbtxinitials} +%let\currentbtxjuniors      \empty    \unexpanded\def\btxsetjuniors      {\def\currentbtxjuniors} +%let\currentbtxsurnames     \empty    \unexpanded\def\btxsetsurnames     {\def\currentbtxsurnames} +%let\currentbtxvons         \empty    \unexpanded\def\btxsetvons         {\def\currentbtxvons} + +%unexpanded\def\getcurrentbtxfirstnames{\ctxcommand{btxauthorfield("firstnames")} +%unexpanded\def\getcurrentbtxinitials  {\ctxcommand{btxauthorfield("initials")} +%unexpanded\def\getcurrentbtxjuniors   {\ctxcommand{btxauthorfield("juniors")} +%unexpanded\def\getcurrentbtxsurnames  {\ctxcommand{btxauthorfield("surnames")} +%unexpanded\def\getcurrentbtxvons      {\ctxcommand{btxauthorfield("vons")} + +\unexpanded\def\currentbtxfirstnames_indeed{\ctxcommand{btx_a_f(\number\currentbtxauthorindex)}} +\unexpanded\def\currentbtxinitials_indeed  {\ctxcommand{btx_a_i(\number\currentbtxauthorindex)}} +\unexpanded\def\currentbtxjuniors_indeed   {\ctxcommand{btx_a_j(\number\currentbtxauthorindex)}} +\unexpanded\def\currentbtxsurnames_indeed  {\ctxcommand{btx_a_s(\number\currentbtxauthorindex)}} +\unexpanded\def\currentbtxvons_indeed      {\ctxcommand{btx_a_v(\number\currentbtxauthorindex)}} + +\let\currentbtxfirstnames   \empty    \unexpanded\def\btxsetfirstnames{\let\currentbtxfirstnames\currentbtxfirstnames_indeed} +\let\currentbtxinitials     \empty    \unexpanded\def\btxsetinitials  {\let\currentbtxinitials  \currentbtxinitials_indeed  } +\let\currentbtxjuniors      \empty    \unexpanded\def\btxsetjuniors   {\let\currentbtxjuniors   \currentbtxjuniors_indeed   } +\let\currentbtxsurnames     \empty    \unexpanded\def\btxsetsurnames  {\let\currentbtxsurnames  \currentbtxsurnames_indeed  } +\let\currentbtxvons         \empty    \unexpanded\def\btxsetvons      {\let\currentbtxvons      \currentbtxvons_indeed      } + +\newconstant\currentbtxoverflow    \unexpanded\def\btxsetoverflow   #1{\currentbtxoverflow   #1\relax} +\newconstant\currentbtxconcat      \unexpanded\def\btxsetconcat     #1{\currentbtxconcat     #1\relax} +\newconstant\currentbtxcount       \unexpanded\def\btxsetcount      #1{\currentbtxcount      #1\relax} +\newconstant\currentbtxauthorindex %unexpanded\def\btxsetauthorindex#1{\currentbtxauthorindex#1\relax} % passed directly +\newconstant\currentbtxauthorcount %unexpanded\def\btxsetauthorcount#1{\currentbtxauthorcount#1\relax} % passed directly  \def\currentbtxauthorvariant{normal} @@ -381,17 +404,17 @@     \let\currentbtxdataset  \empty}  \unexpanded\def\btxcitereset % check for less .. not all resets needed -  {\let        \currentbtxfirst    \empty -   \let        \currentbtxsecond   \empty -   \let        \currentbtxinternal \empty -   \let        \currentbtxbacklink \empty -   \let        \currentbtxbacktrace\empty % not used here -   \let        \currentbtxlanguage \empty -   \let        \currentbtxdataset  \empty -   \let        \currentbtxtag      \empty -   \setconstant\currentbtxoverflow \zerocount -   \setconstant\currentbtxconcat   \zerocount -   \setconstant\currentbtxcount    \zerocount} +  {\let        \currentbtxfirst      \empty +   \let        \currentbtxsecond     \empty +   \let        \currentbtxinternal   \empty +   \let        \currentbtxbacklink   \empty +   \let        \currentbtxbacktrace  \empty % not used here +   \let        \currentbtxlanguage   \empty +   \let        \currentbtxdataset    \empty +   \let        \currentbtxtag        \empty +   \setconstant\currentbtxoverflow   \zerocount +   \setconstant\currentbtxconcat     \zerocount +   \setconstant\currentbtxcount      \zerocount}  %D Tracing @@ -701,8 +724,13 @@     })}%     \endgroup} -\unexpanded\def\btxstartauthor{\begingroup} -\unexpanded\def\btxstopauthor {\endgroup} +\unexpanded\def\btxstartauthor#1#2% +  {\begingroup +   \currentbtxauthorindex#1\relax +   \currentbtxauthorcount#2\relax} + +\unexpanded\def\btxstopauthor +  {\endgroup}  \unexpanded\def\btxciteauthorsetup#1{\fastsetup{\s!btx:\s!cite:\s!author:#1}}  \unexpanded\def\btxlistauthorsetup#1{\fastsetup{\s!btx:\s!list:\s!author:#1}} @@ -950,16 +978,6 @@  \unexpanded\def\btxcitesetup#1%    {\fastsetup{\s!btx:\s!cite:#1}} % no \btxcitereset as we loose dataset and such -\unexpanded\def\btxsetfirst      {\def\currentbtxfirst} -\unexpanded\def\btxsetsecond     {\def\currentbtxsecond} -\unexpanded\def\btxsettag        {\def\currentbtxtag} -\unexpanded\def\btxsetdataset    {\def\currentbtxdataset} -%unexpanded\def\btxsetlanguage   {\def\currentbtxlanguage} -\unexpanded\def\btxsetinternal   {\def\currentbtxinternal} -\unexpanded\def\btxsetcount    #1{\setconstant\currentbtxcount   #1\relax} -\unexpanded\def\btxsetconcat   #1{\setconstant\currentbtxconcat  #1\relax} -\unexpanded\def\btxsetoverflow #1{\setconstant\currentbtxoverflow#1\relax} -  \unexpanded\def\btxstartsubcite#1% #1 can go    {\begingroup     \btxcitereset % todo: limited set diff --git a/tex/context/base/regi-ini.lua b/tex/context/base/regi-ini.lua index 63f45a0b1..9484db7c7 100644 --- a/tex/context/base/regi-ini.lua +++ b/tex/context/base/regi-ini.lua @@ -390,7 +390,7 @@ function regimes.cleanup(regime,str)                      mapping[split] = v                  end              end -            p = Cs((lpeg.utfchartabletopattern(table.keys(mapping))/mapping+P(1))^0) +            p = Cs((lpeg.utfchartabletopattern(mapping)/mapping+P(1))^0)          else              p = false          end diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua index d1eaacd15..ab6ad0649 100644 --- a/tex/context/base/sort-ini.lua +++ b/tex/context/base/sort-ini.lua @@ -53,6 +53,7 @@ have language etc properties that then can be used.</p>  local gsub, rep, sub, sort, concat, tohash, format = string.gsub, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format  local utfbyte, utfchar, utfcharacters, utfvalues = utf.byte, utf.char, utf.characters, utf.values  local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset +local P, Cs, R, S, lpegmatch = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.match  local allocate          = utilities.storage.allocate  local setmetatableindex = table.setmetatableindex @@ -367,6 +368,8 @@ end  -- tricky: { 0, 0, 0 } vs { 0, 0, 0, 0 } => longer wins and mm, pm, zm can have them +-- inlining and checking first slot first doesn't speed up (the 400K complex author sort) +  local function basicsort(sort_a,sort_b)      if sort_a and sort_b then          local na = #sort_a @@ -374,12 +377,14 @@ local function basicsort(sort_a,sort_b)          if na > nb then              na = nb          end -        for i=1,na do -            local ai, bi = sort_a[i], sort_b[i] -            if ai > bi then -                return  1 -            elseif ai < bi then -                return -1 +        if na > 0 then +            for i=1,na do +                local ai, bi = sort_a[i], sort_b[i] +                if ai > bi then +                    return  1 +                elseif ai < bi then +                    return -1 +                end              end          end      end @@ -389,6 +394,10 @@ end  -- todo: compile compare function  local function basic(a,b) -- trace ea and eb +    if a == b then +        -- hashed (shared) entries +        return 0 +    end      local ea, eb = a.split, b.split      local na, nb = #ea, #eb      if na == 0 and nb == 0 then @@ -484,25 +493,59 @@ function sorters.basicsorter(a,b)      return basic(a,b) == -1  end +-- local function numify(s) +--     s = digitsoffset + tonumber(s) -- alternatively we can create range or maybe just hex numbers +--     if s > digitsmaximum then +--         s = digitsmaximum +--     end +--     return utfchar(s) +-- end +-- +-- function sorters.strip(str) -- todo: only letters and such +--     if str and str ~= "" then +--         -- todo: make a decent lpeg +--         str = gsub(str,"\\[\"\'~^`]*","") -- \"e -- hm, too greedy +--         str = gsub(str,"\\%S*","") -- the rest +--         str = gsub(str,"%s","\001") -- can be option +--         str = gsub(str,"[%s%[%](){}%$\"\']*","") -- %s already done +--         if digits == v_numbers then +--             str = gsub(str,"(%d+)",numify) -- sort numbers properly +--         end +--         return str +--     else +--         return "" +--     end +-- end +  local function numify(s) -    s = digitsoffset + tonumber(s) -- alternatively we can create range -    if s > digitsmaximum then -        s = digitsmaximum +    if digits == v_numbers then +        return s +    else +        s = digitsoffset + tonumber(s) -- alternatively we can create range +        if s > digitsmaximum then +            s = digitsmaximum +        end +        return utfchar(s)      end -    return utfchar(s) +end + +local pattern = nil + +local function prepare() +    pattern = Cs( ( +        characters.tex.toutfpattern() +      + lpeg.patterns.whitespace / "\000" +      + (P("\\") * P(1) * R("az","AZ")^0) / "" +      + S("[](){}$\"'") / "" +      + R("09")^1 / numify +      + P(1) +    )^0 ) +    return pattern  end  function sorters.strip(str) -- todo: only letters and such      if str and str ~= "" then -        -- todo: make a decent lpeg -        str = gsub(str,"\\[\"\'~^`]*","") -- \"e -- hm, too greedy -        str = gsub(str,"\\%S*","") -- the rest -        str = gsub(str,"%s","\001") -- can be option -        str = gsub(str,"[%s%[%](){}%$\"\']*","") -- %s already done -        if digits == v_numbers then -            str = gsub(str,"(%d+)",numify) -- sort numbers properly -        end -        return str +        return lpegmatch(pattern or prepare(),str)      else          return ""      end diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf Binary files differindex 5bfd7eade..85d00dfa0 100644 --- a/tex/context/base/status-files.pdf +++ b/tex/context/base/status-files.pdf diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf Binary files differindex 1da58153a..b0160abbc 100644 --- a/tex/context/base/status-lua.pdf +++ b/tex/context/base/status-lua.pdf diff --git a/tex/context/base/x-asciimath.lua b/tex/context/base/x-asciimath.lua index b3202daa9..0849b42a5 100644 --- a/tex/context/base/x-asciimath.lua +++ b/tex/context/base/x-asciimath.lua @@ -829,9 +829,9 @@ local m_right = {  }  local p_left = -    lpeg.utfchartabletopattern(keys(m_left)) / m_left +    lpeg.utfchartabletopattern(m_left) / m_left  local p_right = -    lpeg.utfchartabletopattern(keys(m_right)) / m_right +    lpeg.utfchartabletopattern(m_right) / m_right  -- special cases diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 52a65ea57..2f26be70e 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@  -- merged file : luatex-fonts-merged.lua  -- parent file : luatex-fonts.lua --- merge date  : 07/04/14 15:55:31 +-- merge date  : 07/06/14 22:50:12  do -- begin closure to overcome local limits and interference @@ -665,17 +665,34 @@ end  function lpeg.utfchartabletopattern(list)     local tree={}    local hash={} -  for i=1,#list do -    local t=tree -    for c in gmatch(list[i],".") do -      local tc=t[c] -      if not tc then -        tc={} -        t[c]=tc +  local n=#list +  if n==0 then +    for s in next,list do +      local t=tree +      for c in gmatch(s,".") do +        local tc=t[c] +        if not tc then +          tc={} +          t[c]=tc +        end +        t=tc +      end +      hash[t]=s +    end +  else +    for i=1,n do +      local t=tree +      local s=list[i] +      for c in gmatch(s,".") do +        local tc=t[c] +        if not tc then +          tc={} +          t[c]=tc +        end +        t=tc        end -      t=tc +      hash[t]=s      end -    hash[t]=list[i]    end    return make(tree,hash)  end  | 
