diff options
Diffstat (limited to 'tex/context/base/mkiv/char-tex.lua')
-rw-r--r-- | tex/context/base/mkiv/char-tex.lua | 88 |
1 files changed, 64 insertions, 24 deletions
diff --git a/tex/context/base/mkiv/char-tex.lua b/tex/context/base/mkiv/char-tex.lua index 3e0d02bc1..f123f0092 100644 --- a/tex/context/base/mkiv/char-tex.lua +++ b/tex/context/base/mkiv/char-tex.lua @@ -6,9 +6,8 @@ if not modules then modules = { } end modules ['char-tex'] = { license = "see context related readme files" } - local lpeg = lpeg -local next, type = next, type +local tonumber, next, type = tonumber, next, type local format, find, gmatch = string.format, string.find, string.gmatch local utfchar, utfbyte = utf.char, utf.byte local concat, tohash = table.concat, table.tohash @@ -329,7 +328,7 @@ local ligaturemapping = allocate { -- } -- -- function texcharacters.toutf(str,strip) --- if not find(str,"\\") then +-- if not find(str,"\\",1,true) then -- return str -- elseif strip then -- return lpegmatch(both_1,str) @@ -381,7 +380,7 @@ end function texcharacters.toutf(str,strip) if str == "" then return str - elseif not find(str,"\\") then + elseif not find(str,"\\",1,true) then return str -- elseif strip then else @@ -430,6 +429,9 @@ local texsetcatcode = tex.setcatcode local contextsprint = context.sprint local ctxcatcodes = catcodes.numbers.ctxcatcodes +local texsetmacro = tokens.setters.macro +local texsetchar = tokens.setters.char + function texcharacters.defineaccents() local ctx_dodefineaccentcommand = context.dodefineaccentcommand local ctx_dodefineaccent = context.dodefineaccent @@ -485,12 +487,38 @@ tex.uprint = commands.utfchar -- in contect we don't use lc and uc codes (in fact in luatex we should have a hf code) -- so at some point we might drop this -local forbidden = tohash { -- at least now - 0x00A0, - 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D, - 0x202F, - 0x205F, - -- 0xFEFF, +-- The following get set at the TeX end: + +local forbidden = tohash { + 0x000A0, -- zs nobreakspace <self> + 0x000AD, -- cf softhyphen <self> + -- 0x00600, -- cf arabicnumber <self> + -- 0x00601, -- cf arabicsanah <self> + -- 0x00602, -- cf arabicfootnotemarker <self> + -- 0x00603, -- cf arabicsafha <self> + -- 0x00604, -- cf arabicsamvat <self> + -- 0x00605, -- cf arabicnumberabove <self> + -- 0x0061C, -- cf arabiclettermark <self> + -- 0x006DD, -- cf arabicendofayah <self> + -- 0x008E2, -- cf arabicdisputedendofayah <self> + 0x02000, -- zs enquad <self> + 0x02001, -- zs emquad <self> + 0x02002, -- zs enspace \kern .5\emwidth + 0x02003, -- zs emspace \hskip \emwidth + 0x02004, -- zs threeperemspace <self> + 0x02005, -- zs fourperemspace <self> + 0x02006, -- zs sixperemspace <self> + 0x02007, -- zs figurespace <self> + 0x02008, -- zs punctuationspace <self> + 0x02009, -- zs breakablethinspace <self> + 0x0200A, -- zs hairspace <self> + 0x0200B, -- cf zerowidthspace <self> + 0x0200C, -- cf zwnj <self> + 0x0200D, -- cf zwj <self> + 0x0202F, -- zs narrownobreakspace <self> + 0x0205F, -- zs medspace \textormathspace +\medmuskip 2 + -- 0x03000, -- zs ideographicspace <self> + -- 0x0FEFF, -- cf zerowidthnobreakspace \penalty \plustenthousand \kern \zeropoint } local csletters = characters.csletters -- also a signal that we have initialized @@ -549,18 +577,15 @@ if not csletters then if is_character[category] then if chr.unicodeslot < 128 then if isletter then - -- setmacro local c = utfchar(u) - contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,c)) -- has no s + texsetmacro(contextname,c) csletters[c] = u else - -- setchar - contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s + texsetchar(contextname,u) end else - -- setmacro local c = utfchar(u) - contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,c)) -- has no s + texsetmacro(contextname,c) if isletter and u >= 32 and u <= 65536 then csletters[c] = u end @@ -585,9 +610,10 @@ if not csletters then end -- elseif is_command[category] and not forbidden[u] then - -- set - contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") - activated[#activated+1] = u + -- contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\",contextname,"}}") + -- activated[#activated+1] = u + local c = utfchar(u) + texsetmacro(contextname,c) elseif is_mark[category] then texsetlccode(u,u,u) -- for hyphenation end @@ -623,11 +649,25 @@ if not csletters then -- this slows down format generation by over 10 percent for k, v in next, blocks do if v.catcode == "letter" then - for u=v.first,v.last do - csletters[utfchar(u)] = u - -- - -- texsetlccode(u,u,u) -- self self - -- + local first = v.first + local last = v.last + local gaps = v.gaps + if first and last then + for u=first,last do + csletters[utfchar(u)] = u + -- + -- texsetlccode(u,u,u) -- self self + -- + end + end + if gaps then + for i=1,#gaps do + local u = gaps[i] + csletters[utfchar(u)] = u + -- + -- texsetlccode(u,u,u) -- self self + -- + end end end end |