From ccdd2d9e98ac3bbf794eefd5a42ed063832a61b3 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Tue, 7 Oct 2014 11:14:00 +0530 Subject: beta 2014.10.07 11:14 --- doc/context/manuals/allkind/mkiv-publications.pdf | Bin 457944 -> 457924 bytes scripts/context/lua/mtx-epub.lua | 5 +- scripts/context/lua/mtxrun.lua | 455 +++++++------ scripts/context/stubs/mswin/mtxrun.lua | 455 +++++++------ scripts/context/stubs/unix/mtxrun | 455 +++++++------ scripts/context/stubs/win64/mtxrun.lua | 455 +++++++------ tex/context/base/attr-lay.lua | 1 + tex/context/base/back-exp.lua | 20 +- tex/context/base/back-exp.mkiv | 23 +- tex/context/base/back-pdf.mkiv | 2 +- tex/context/base/char-def.lua | 12 + tex/context/base/cont-new.mkiv | 119 +++- tex/context/base/context-version.pdf | Bin 4391 -> 4388 bytes tex/context/base/context.mkiv | 2 +- tex/context/base/data-tex.lua | 8 +- tex/context/base/file-job.lua | 16 +- tex/context/base/font-afm.lua | 78 ++- tex/context/base/font-agl.lua | 2 + tex/context/base/font-con.lua | 84 ++- tex/context/base/font-ctx.lua | 34 +- tex/context/base/font-enh.lua | 28 +- tex/context/base/font-ext.lua | 10 +- tex/context/base/font-gds.lua | 2 +- tex/context/base/font-hsh.lua | 32 +- tex/context/base/font-ini.mkvi | 6 +- tex/context/base/font-map.lua | 222 +++--- tex/context/base/font-mis.lua | 2 +- tex/context/base/font-nod.lua | 14 +- tex/context/base/font-otb.lua | 94 +-- tex/context/base/font-otf.lua | 354 ++++++++-- tex/context/base/font-otn.lua | 25 +- tex/context/base/font-otp.lua | 6 + tex/context/base/font-tfm.lua | 5 + tex/context/base/grph-epd.lua | 3 + tex/context/base/grph-epd.mkiv | 2 +- tex/context/base/grph-inc.lua | 24 +- tex/context/base/l-lpeg.lua | 75 ++ tex/context/base/l-md5.lua | 64 +- tex/context/base/l-table.lua | 2 +- tex/context/base/l-unicode.lua | 623 +++++++++-------- tex/context/base/lang-def.mkiv | 3 +- tex/context/base/lang-txt.lua | 59 +- tex/context/base/lang-url.mkiv | 28 + tex/context/base/lpdf-ano.lua | 134 ++-- tex/context/base/lpdf-epa.lua | 228 ++++++- tex/context/base/lpdf-epd.lua | 752 +++++++++++++++----- tex/context/base/lpdf-fld.lua | 6 +- tex/context/base/lpdf-ini.lua | 495 ++++++++------ tex/context/base/m-pstricks.mkii | 2 +- tex/context/base/math-fbk.lua | 22 +- tex/context/base/math-frc.lua | 8 +- tex/context/base/math-frc.mkiv | 94 ++- tex/context/base/math-ini.mkiv | 20 +- tex/context/base/math-rad.mkvi | 2 +- tex/context/base/math-stc.mkvi | 18 + tex/context/base/math-tag.lua | 3 +- tex/context/base/mult-de.mkii | 2 +- tex/context/base/mult-def.lua | 4 +- tex/context/base/mult-def.mkiv | 5 + tex/context/base/mult-en.mkii | 2 +- tex/context/base/mult-fr.mkii | 2 +- tex/context/base/mult-it.mkii | 2 +- tex/context/base/mult-nl.mkii | 2 +- tex/context/base/mult-pe.mkii | 2 +- tex/context/base/mult-ro.mkii | 2 +- tex/context/base/node-fin.lua | 40 +- tex/context/base/page-sid.mkiv | 36 +- tex/context/base/publ-dat.lua | 2 +- tex/context/base/publ-imp-apa.mkvi | 265 +++++--- tex/context/base/publ-ini.lua | 8 +- tex/context/base/publ-ini.mkiv | 13 +- tex/context/base/regi-ini.lua | 34 +- tex/context/base/regi-pdfdoc.lua | 26 + tex/context/base/scrp-ini.lua | 5 +- tex/context/base/status-files.pdf | Bin 24767 -> 24694 bytes tex/context/base/status-lua.pdf | Bin 325265 -> 333462 bytes tex/context/base/strc-bkm.lua | 295 ++++++-- tex/context/base/strc-bkm.mkiv | 32 + tex/context/base/strc-doc.lua | 11 +- tex/context/base/strc-ref.lua | 2 + tex/context/base/strc-ren.mkiv | 10 + tex/context/base/supp-box.lua | 17 +- tex/context/base/supp-box.mkiv | 18 +- tex/context/base/syst-ini.mkiv | 2 +- tex/context/base/typo-dig.lua | 7 +- tex/context/base/typo-tal.lua | 10 +- tex/context/base/util-sta.lua | 22 +- tex/context/base/util-tpl.lua | 20 +- tex/context/base/x-asciimath.lua | 21 +- tex/context/base/x-asciimath.mkiv | 17 +- tex/context/base/x-mathml.xsd | 6 +- tex/context/fonts/treatments.lfg | 88 +-- tex/context/interface/keys-cs.xml | 2 +- tex/context/interface/keys-de.xml | 2 +- tex/context/interface/keys-en.xml | 2 +- tex/context/interface/keys-fr.xml | 2 +- tex/context/interface/keys-it.xml | 2 +- tex/context/interface/keys-nl.xml | 2 +- tex/context/interface/keys-pe.xml | 2 +- tex/context/interface/keys-ro.xml | 2 +- tex/generic/context/luatex/luatex-basics-gen.lua | 5 + tex/generic/context/luatex/luatex-fonts-merged.lua | 753 +++++++++++++++------ tex/generic/context/luatex/luatex-fonts-otn.lua | 25 +- 103 files changed, 5046 insertions(+), 2476 deletions(-) create mode 100644 tex/context/base/regi-pdfdoc.lua diff --git a/doc/context/manuals/allkind/mkiv-publications.pdf b/doc/context/manuals/allkind/mkiv-publications.pdf index b00659d04..74bcb637e 100644 Binary files a/doc/context/manuals/allkind/mkiv-publications.pdf and b/doc/context/manuals/allkind/mkiv-publications.pdf differ diff --git a/scripts/context/lua/mtx-epub.lua b/scripts/context/lua/mtx-epub.lua index fa5a85134..6d8dfa63f 100644 --- a/scripts/context/lua/mtx-epub.lua +++ b/scripts/context/lua/mtx-epub.lua @@ -23,9 +23,8 @@ if not modules then modules = { } end modules ['mtx-epub'] = { -- OEBPS -- content.opf -- toc.ncx --- Images --- Styles --- Text +-- images +-- styles -- mimetype -- todo: diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index fcfdc1f17..33a8912df 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -437,7 +437,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-lpeg"] = package.loaded["l-lpeg"] or true --- original size: 32003, stripped down to: 16772 +-- original size: 33805, stripped down to: 18228 if not modules then modules={} end modules ['l-lpeg']={ version=1.001, @@ -498,6 +498,8 @@ patterns.utfbom_16_le=utfbom_16_le patterns.utfbom_8=utfbom_8 patterns.utf_16_be_nl=P("\000\r\000\n")+P("\000\r")+P("\000\n") patterns.utf_16_le_nl=P("\r\000\n\000")+P("\r\000")+P("\n\000") +patterns.utf_32_be_nl=P("\000\000\000\r\000\000\000\n")+P("\000\000\000\r")+P("\000\000\000\n") +patterns.utf_32_le_nl=P("\r\000\000\000\n\000\000\000")+P("\r\000\000\000")+P("\n\000\000\000") patterns.utf8one=R("\000\127") patterns.utf8two=R("\194\223")*utf8next patterns.utf8three=R("\224\239")*utf8next*utf8next @@ -1080,6 +1082,65 @@ local case_2=period*(digit-trailingzeros)^1*(trailingzeros/"") local number=digit^1*(case_1+case_2) local stripper=Cs((number+1)^0) lpeg.patterns.stripzeros=stripper +local byte_to_HEX={} +local byte_to_hex={} +local byte_to_dec={} +local hex_to_byte={} +for i=0,255 do + local H=format("%02X",i) + local h=format("%02x",i) + local d=format("%03i",i) + local c=char(i) + byte_to_HEX[c]=H + byte_to_hex[c]=h + byte_to_dec[c]=d + hex_to_byte[h]=c + hex_to_byte[H]=c +end +local hextobyte=P(2)/hex_to_byte +local bytetoHEX=P(1)/byte_to_HEX +local bytetohex=P(1)/byte_to_hex +local bytetodec=P(1)/byte_to_dec +local hextobytes=Cs(hextobyte^0) +local bytestoHEX=Cs(bytetoHEX^0) +local bytestohex=Cs(bytetohex^0) +local bytestodec=Cs(bytetodec^0) +patterns.hextobyte=hextobyte +patterns.bytetoHEX=bytetoHEX +patterns.bytetohex=bytetohex +patterns.bytetodec=bytetodec +patterns.hextobytes=hextobytes +patterns.bytestoHEX=bytestoHEX +patterns.bytestohex=bytestohex +patterns.bytestodec=bytestodec +function string.toHEX(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end +function string.tohex(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestohex,s) + end +end +function string.todec(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestodec,s) + end +end +function string.tobytes(s) + if not s or s=="" then + return s + else + return lpegmatch(hextobytes,s) + end +end end -- of closure @@ -1212,7 +1273,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-table"] = package.loaded["l-table"] or true --- original size: 33477, stripped down to: 21843 +-- original size: 33499, stripped down to: 21844 if not modules then modules={} end modules ['l-table']={ version=1.001, @@ -1259,7 +1320,7 @@ local function compare(a,b) if ta==tb then return a0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +local utf_16_be_getbom=patterns.utfbom_16_be^-1 +local utf_16_le_getbom=patterns.utfbom_16_le^-1 +local utf_32_be_getbom=patterns.utfbom_32_be^-1 +local utf_32_le_getbom=patterns.utfbom_32_le^-1 +local utf_16_be_linesplitter=utf_16_be_getbom*lpeg.tsplitat(patterns.utf_16_be_nl) +local utf_16_le_linesplitter=utf_16_le_getbom*lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_32_be_linesplitter=utf_32_be_getbom*lpeg.tsplitat(patterns.utf_32_be_nl) +local utf_32_le_linesplitter=utf_32_le_getbom*lpeg.tsplitat(patterns.utf_32_le_nl) +local more=0 +local p_utf16_to_utf8_be=C(1)*C(1)/function(left,right) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf16_to_utf8_le=C(1)*C(1)/function(right,left) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf32_to_utf8_be=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(a)+256*256*byte(b)+256*byte(c)+byte(d)) +end +local p_utf32_to_utf8_le=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(d)+256*256*byte(c)+256*byte(b)+byte(a)) +end +p_utf16_to_utf8_be=P(true)/function() more=0 end*utf_16_be_getbom*Cs(p_utf16_to_utf8_be^0) +p_utf16_to_utf8_le=P(true)/function() more=0 end*utf_16_le_getbom*Cs(p_utf16_to_utf8_le^0) +p_utf32_to_utf8_be=P(true)/function() more=0 end*utf_32_be_getbom*Cs(p_utf32_to_utf8_be^0) +p_utf32_to_utf8_le=P(true)/function() more=0 end*utf_32_le_getbom*Cs(p_utf32_to_utf8_le^0) +patterns.utf16_to_utf8_be=p_utf16_to_utf8_be +patterns.utf16_to_utf8_le=p_utf16_to_utf8_le +patterns.utf32_to_utf8_be=p_utf32_to_utf8_be +patterns.utf32_to_utf8_le=p_utf32_to_utf8_le +utf16_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_be,s) + else + return s end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in bytepairs(t[i]) do - if right then - local now=256*right+left - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +end +utf16_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_be_linesplitter,t) end - utf32_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*256*256*a+256*256*b - else - r=r+1 - result[t]=utfchar(more+256*a+b) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_be,s) end - return t end - utf32_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*b+a - else - r=r+1 - result[t]=utfchar(more+256*256*256*b+256*256*a) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) - end - return t + return t +end +utf16_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_le,s) + else + return s end -else - utf16_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_be_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if left=="\000" then - r=r+1 - result[r]=utfchar(byte(right)) - elseif right then - local now=256*byte(left)+byte(right) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) +end +utf16_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_le,s) end - return t end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) + return t +end +utf32_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_be,s) + else + return s + end +end +utf32_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_be_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_be,s) end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if right=="\000" then - r=r+1 - result[r]=utfchar(byte(left)) - elseif right then - local now=256*byte(right)+byte(left) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) + end + return t +end +utf32_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_le,s) + else + return s + end +end +utf32_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_le,s) end - return t end - utf32_to_utf8_le=function() return {} end - utf32_to_utf8_be=function() return {} end + return t end +utf.utf16_to_utf8_le_t=utf16_to_utf8_le_t +utf.utf16_to_utf8_be_t=utf16_to_utf8_be_t +utf.utf32_to_utf8_le_t=utf32_to_utf8_le_t +utf.utf32_to_utf8_be_t=utf32_to_utf8_be_t utf.utf16_to_utf8_le=utf16_to_utf8_le utf.utf16_to_utf8_be=utf16_to_utf8_be utf.utf32_to_utf8_le=utf32_to_utf8_le utf.utf32_to_utf8_be=utf32_to_utf8_be -function utf.utf8_to_utf8(t) +function utf.utf8_to_utf8_t(t) return type(t)=="string" and lpegmatch(utflinesplitter,t) or t end -function utf.utf16_to_utf8(t,endian) - return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +function utf.utf16_to_utf8_t(t,endian) + return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t end -function utf.utf32_to_utf8(t,endian) - return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t +function utf.utf32_to_utf8_t(t,endian) + return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t end -local function little(c) - local b=byte(c) +local function little(b) if b<0x10000 then return char(b%256,b/256) else @@ -4893,8 +4939,7 @@ local function little(c) return char(b1%256,b1/256,b2%256,b2/256) end end -local function big(c) - local b=byte(c) +local function big(b) if b<0x10000 then return char(b/256,b%256) else @@ -4903,27 +4948,29 @@ local function big(c) return char(b1/256,b1%256,b2/256,b2%256) end end -local l_remap=utf.remapper(little,"pattern") -local b_remap=utf.remapper(big,"pattern") -function utf.utf8_to_utf16_be(str,nobom) +local l_remap=Cs((p_utf8byte/little+P(1)/"")^0) +local b_remap=Cs((p_utf8byte/big+P(1)/"")^0) +local function utf8_to_utf16_be(str,nobom) if nobom then return lpegmatch(b_remap,str) else return char(254,255)..lpegmatch(b_remap,str) end end -function utf.utf8_to_utf16_le(str,nobom) +local function utf8_to_utf16_le(str,nobom) if nobom then return lpegmatch(l_remap,str) else return char(255,254)..lpegmatch(l_remap,str) end end +utf.utf8_to_utf16_be=utf8_to_utf16_be +utf.utf8_to_utf16_le=utf8_to_utf16_le function utf.utf8_to_utf16(str,littleendian,nobom) if littleendian then - return utf.utf8_to_utf16_le(str,nobom) + return utf8_to_utf16_le(str,nobom) else - return utf.utf8_to_utf16_be(str,nobom) + return utf8_to_utf16_be(str,nobom) end end local pattern=Cs ( @@ -4939,16 +4986,16 @@ function utf.xstring(s) return format("0x%05X",type(s)=="number" and s or utfbyte(s)) end function utf.toeight(str) - if not str then + if not str or str=="" then return nil end local utftype=lpegmatch(p_utfstricttype,str) if utftype=="utf-8" then - return sub(str,4) - elseif utftype=="utf-16-le" then - return utf16_to_utf8_le(str) + return sub(str,4) elseif utftype=="utf-16-be" then - return utf16_to_utf8_ne(str) + return utf16_to_utf8_be(str) + elseif utftype=="utf-16-le" then + return utf16_to_utf8_le(str) else return str end @@ -8942,7 +8989,7 @@ do -- create closure to overcome 200 locals limit package.loaded["util-tpl"] = package.loaded["util-tpl"] or true --- original size: 6251, stripped down to: 3488 +-- original size: 6621, stripped down to: 3627 if not modules then modules={} end modules ['util-tpl']={ version=1.001, @@ -8984,7 +9031,7 @@ local sqlescape=lpeg.replacer { { "\r\n","\\n" }, { "\r","\\n" }, } -local sqlquoted=lpeg.Cs(lpeg.Cc("'")*sqlescape*lpeg.Cc("'")) +local sqlquoted=Cs(Cc("'")*sqlescape*Cc("'")) lpegpatterns.sqlescape=sqlescape lpegpatterns.sqlquoted=sqlquoted local luaescape=lpegpatterns.luaescape @@ -9007,12 +9054,20 @@ local quotedescapers={ local luaescaper=escapers.lua local quotedluaescaper=quotedescapers.lua local function replacekeyunquoted(s,t,how,recurse) - local escaper=how and escapers[how] or luaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and escapers[how] or luaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local function replacekeyquoted(s,t,how,recurse) - local escaper=how and quotedescapers[how] or quotedluaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and quotedescapers[how] or quotedluaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local single=P("%") local double=P("%%") @@ -17585,8 +17640,8 @@ end -- of closure -- used libraries : l-lua.lua l-package.lua l-lpeg.lua l-function.lua l-string.lua l-table.lua l-io.lua l-number.lua l-set.lua l-os.lua l-file.lua l-gzip.lua l-md5.lua l-url.lua l-dir.lua l-boolean.lua l-unicode.lua l-math.lua util-str.lua util-tab.lua util-sto.lua util-prs.lua util-fmt.lua trac-set.lua trac-log.lua trac-inf.lua trac-pro.lua util-lua.lua util-deb.lua util-mrg.lua util-tpl.lua util-env.lua luat-env.lua lxml-tab.lua lxml-lpt.lua lxml-mis.lua lxml-aux.lua lxml-xml.lua trac-xml.lua data-ini.lua data-exp.lua data-env.lua data-tmp.lua data-met.lua data-res.lua data-pre.lua data-inp.lua data-out.lua data-fil.lua data-con.lua data-use.lua data-zip.lua data-tre.lua data-sch.lua data-lua.lua data-aux.lua data-tmf.lua data-lst.lua util-lib.lua luat-sta.lua luat-fmt.lua -- skipped libraries : - --- original bytes : 724607 --- stripped bytes : 257595 +-- original bytes : 728320 +-- stripped bytes : 259616 -- end library merge diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index fcfdc1f17..33a8912df 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -437,7 +437,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-lpeg"] = package.loaded["l-lpeg"] or true --- original size: 32003, stripped down to: 16772 +-- original size: 33805, stripped down to: 18228 if not modules then modules={} end modules ['l-lpeg']={ version=1.001, @@ -498,6 +498,8 @@ patterns.utfbom_16_le=utfbom_16_le patterns.utfbom_8=utfbom_8 patterns.utf_16_be_nl=P("\000\r\000\n")+P("\000\r")+P("\000\n") patterns.utf_16_le_nl=P("\r\000\n\000")+P("\r\000")+P("\n\000") +patterns.utf_32_be_nl=P("\000\000\000\r\000\000\000\n")+P("\000\000\000\r")+P("\000\000\000\n") +patterns.utf_32_le_nl=P("\r\000\000\000\n\000\000\000")+P("\r\000\000\000")+P("\n\000\000\000") patterns.utf8one=R("\000\127") patterns.utf8two=R("\194\223")*utf8next patterns.utf8three=R("\224\239")*utf8next*utf8next @@ -1080,6 +1082,65 @@ local case_2=period*(digit-trailingzeros)^1*(trailingzeros/"") local number=digit^1*(case_1+case_2) local stripper=Cs((number+1)^0) lpeg.patterns.stripzeros=stripper +local byte_to_HEX={} +local byte_to_hex={} +local byte_to_dec={} +local hex_to_byte={} +for i=0,255 do + local H=format("%02X",i) + local h=format("%02x",i) + local d=format("%03i",i) + local c=char(i) + byte_to_HEX[c]=H + byte_to_hex[c]=h + byte_to_dec[c]=d + hex_to_byte[h]=c + hex_to_byte[H]=c +end +local hextobyte=P(2)/hex_to_byte +local bytetoHEX=P(1)/byte_to_HEX +local bytetohex=P(1)/byte_to_hex +local bytetodec=P(1)/byte_to_dec +local hextobytes=Cs(hextobyte^0) +local bytestoHEX=Cs(bytetoHEX^0) +local bytestohex=Cs(bytetohex^0) +local bytestodec=Cs(bytetodec^0) +patterns.hextobyte=hextobyte +patterns.bytetoHEX=bytetoHEX +patterns.bytetohex=bytetohex +patterns.bytetodec=bytetodec +patterns.hextobytes=hextobytes +patterns.bytestoHEX=bytestoHEX +patterns.bytestohex=bytestohex +patterns.bytestodec=bytestodec +function string.toHEX(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end +function string.tohex(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestohex,s) + end +end +function string.todec(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestodec,s) + end +end +function string.tobytes(s) + if not s or s=="" then + return s + else + return lpegmatch(hextobytes,s) + end +end end -- of closure @@ -1212,7 +1273,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-table"] = package.loaded["l-table"] or true --- original size: 33477, stripped down to: 21843 +-- original size: 33499, stripped down to: 21844 if not modules then modules={} end modules ['l-table']={ version=1.001, @@ -1259,7 +1320,7 @@ local function compare(a,b) if ta==tb then return a0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +local utf_16_be_getbom=patterns.utfbom_16_be^-1 +local utf_16_le_getbom=patterns.utfbom_16_le^-1 +local utf_32_be_getbom=patterns.utfbom_32_be^-1 +local utf_32_le_getbom=patterns.utfbom_32_le^-1 +local utf_16_be_linesplitter=utf_16_be_getbom*lpeg.tsplitat(patterns.utf_16_be_nl) +local utf_16_le_linesplitter=utf_16_le_getbom*lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_32_be_linesplitter=utf_32_be_getbom*lpeg.tsplitat(patterns.utf_32_be_nl) +local utf_32_le_linesplitter=utf_32_le_getbom*lpeg.tsplitat(patterns.utf_32_le_nl) +local more=0 +local p_utf16_to_utf8_be=C(1)*C(1)/function(left,right) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf16_to_utf8_le=C(1)*C(1)/function(right,left) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf32_to_utf8_be=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(a)+256*256*byte(b)+256*byte(c)+byte(d)) +end +local p_utf32_to_utf8_le=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(d)+256*256*byte(c)+256*byte(b)+byte(a)) +end +p_utf16_to_utf8_be=P(true)/function() more=0 end*utf_16_be_getbom*Cs(p_utf16_to_utf8_be^0) +p_utf16_to_utf8_le=P(true)/function() more=0 end*utf_16_le_getbom*Cs(p_utf16_to_utf8_le^0) +p_utf32_to_utf8_be=P(true)/function() more=0 end*utf_32_be_getbom*Cs(p_utf32_to_utf8_be^0) +p_utf32_to_utf8_le=P(true)/function() more=0 end*utf_32_le_getbom*Cs(p_utf32_to_utf8_le^0) +patterns.utf16_to_utf8_be=p_utf16_to_utf8_be +patterns.utf16_to_utf8_le=p_utf16_to_utf8_le +patterns.utf32_to_utf8_be=p_utf32_to_utf8_be +patterns.utf32_to_utf8_le=p_utf32_to_utf8_le +utf16_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_be,s) + else + return s end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in bytepairs(t[i]) do - if right then - local now=256*right+left - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +end +utf16_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_be_linesplitter,t) end - utf32_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*256*256*a+256*256*b - else - r=r+1 - result[t]=utfchar(more+256*a+b) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_be,s) end - return t end - utf32_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*b+a - else - r=r+1 - result[t]=utfchar(more+256*256*256*b+256*256*a) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) - end - return t + return t +end +utf16_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_le,s) + else + return s end -else - utf16_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_be_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if left=="\000" then - r=r+1 - result[r]=utfchar(byte(right)) - elseif right then - local now=256*byte(left)+byte(right) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) +end +utf16_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_le,s) end - return t end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) + return t +end +utf32_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_be,s) + else + return s + end +end +utf32_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_be_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_be,s) end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if right=="\000" then - r=r+1 - result[r]=utfchar(byte(left)) - elseif right then - local now=256*byte(right)+byte(left) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) + end + return t +end +utf32_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_le,s) + else + return s + end +end +utf32_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_le,s) end - return t end - utf32_to_utf8_le=function() return {} end - utf32_to_utf8_be=function() return {} end + return t end +utf.utf16_to_utf8_le_t=utf16_to_utf8_le_t +utf.utf16_to_utf8_be_t=utf16_to_utf8_be_t +utf.utf32_to_utf8_le_t=utf32_to_utf8_le_t +utf.utf32_to_utf8_be_t=utf32_to_utf8_be_t utf.utf16_to_utf8_le=utf16_to_utf8_le utf.utf16_to_utf8_be=utf16_to_utf8_be utf.utf32_to_utf8_le=utf32_to_utf8_le utf.utf32_to_utf8_be=utf32_to_utf8_be -function utf.utf8_to_utf8(t) +function utf.utf8_to_utf8_t(t) return type(t)=="string" and lpegmatch(utflinesplitter,t) or t end -function utf.utf16_to_utf8(t,endian) - return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +function utf.utf16_to_utf8_t(t,endian) + return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t end -function utf.utf32_to_utf8(t,endian) - return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t +function utf.utf32_to_utf8_t(t,endian) + return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t end -local function little(c) - local b=byte(c) +local function little(b) if b<0x10000 then return char(b%256,b/256) else @@ -4893,8 +4939,7 @@ local function little(c) return char(b1%256,b1/256,b2%256,b2/256) end end -local function big(c) - local b=byte(c) +local function big(b) if b<0x10000 then return char(b/256,b%256) else @@ -4903,27 +4948,29 @@ local function big(c) return char(b1/256,b1%256,b2/256,b2%256) end end -local l_remap=utf.remapper(little,"pattern") -local b_remap=utf.remapper(big,"pattern") -function utf.utf8_to_utf16_be(str,nobom) +local l_remap=Cs((p_utf8byte/little+P(1)/"")^0) +local b_remap=Cs((p_utf8byte/big+P(1)/"")^0) +local function utf8_to_utf16_be(str,nobom) if nobom then return lpegmatch(b_remap,str) else return char(254,255)..lpegmatch(b_remap,str) end end -function utf.utf8_to_utf16_le(str,nobom) +local function utf8_to_utf16_le(str,nobom) if nobom then return lpegmatch(l_remap,str) else return char(255,254)..lpegmatch(l_remap,str) end end +utf.utf8_to_utf16_be=utf8_to_utf16_be +utf.utf8_to_utf16_le=utf8_to_utf16_le function utf.utf8_to_utf16(str,littleendian,nobom) if littleendian then - return utf.utf8_to_utf16_le(str,nobom) + return utf8_to_utf16_le(str,nobom) else - return utf.utf8_to_utf16_be(str,nobom) + return utf8_to_utf16_be(str,nobom) end end local pattern=Cs ( @@ -4939,16 +4986,16 @@ function utf.xstring(s) return format("0x%05X",type(s)=="number" and s or utfbyte(s)) end function utf.toeight(str) - if not str then + if not str or str=="" then return nil end local utftype=lpegmatch(p_utfstricttype,str) if utftype=="utf-8" then - return sub(str,4) - elseif utftype=="utf-16-le" then - return utf16_to_utf8_le(str) + return sub(str,4) elseif utftype=="utf-16-be" then - return utf16_to_utf8_ne(str) + return utf16_to_utf8_be(str) + elseif utftype=="utf-16-le" then + return utf16_to_utf8_le(str) else return str end @@ -8942,7 +8989,7 @@ do -- create closure to overcome 200 locals limit package.loaded["util-tpl"] = package.loaded["util-tpl"] or true --- original size: 6251, stripped down to: 3488 +-- original size: 6621, stripped down to: 3627 if not modules then modules={} end modules ['util-tpl']={ version=1.001, @@ -8984,7 +9031,7 @@ local sqlescape=lpeg.replacer { { "\r\n","\\n" }, { "\r","\\n" }, } -local sqlquoted=lpeg.Cs(lpeg.Cc("'")*sqlescape*lpeg.Cc("'")) +local sqlquoted=Cs(Cc("'")*sqlescape*Cc("'")) lpegpatterns.sqlescape=sqlescape lpegpatterns.sqlquoted=sqlquoted local luaescape=lpegpatterns.luaescape @@ -9007,12 +9054,20 @@ local quotedescapers={ local luaescaper=escapers.lua local quotedluaescaper=quotedescapers.lua local function replacekeyunquoted(s,t,how,recurse) - local escaper=how and escapers[how] or luaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and escapers[how] or luaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local function replacekeyquoted(s,t,how,recurse) - local escaper=how and quotedescapers[how] or quotedluaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and quotedescapers[how] or quotedluaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local single=P("%") local double=P("%%") @@ -17585,8 +17640,8 @@ end -- of closure -- used libraries : l-lua.lua l-package.lua l-lpeg.lua l-function.lua l-string.lua l-table.lua l-io.lua l-number.lua l-set.lua l-os.lua l-file.lua l-gzip.lua l-md5.lua l-url.lua l-dir.lua l-boolean.lua l-unicode.lua l-math.lua util-str.lua util-tab.lua util-sto.lua util-prs.lua util-fmt.lua trac-set.lua trac-log.lua trac-inf.lua trac-pro.lua util-lua.lua util-deb.lua util-mrg.lua util-tpl.lua util-env.lua luat-env.lua lxml-tab.lua lxml-lpt.lua lxml-mis.lua lxml-aux.lua lxml-xml.lua trac-xml.lua data-ini.lua data-exp.lua data-env.lua data-tmp.lua data-met.lua data-res.lua data-pre.lua data-inp.lua data-out.lua data-fil.lua data-con.lua data-use.lua data-zip.lua data-tre.lua data-sch.lua data-lua.lua data-aux.lua data-tmf.lua data-lst.lua util-lib.lua luat-sta.lua luat-fmt.lua -- skipped libraries : - --- original bytes : 724607 --- stripped bytes : 257595 +-- original bytes : 728320 +-- stripped bytes : 259616 -- end library merge diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index fcfdc1f17..33a8912df 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -437,7 +437,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-lpeg"] = package.loaded["l-lpeg"] or true --- original size: 32003, stripped down to: 16772 +-- original size: 33805, stripped down to: 18228 if not modules then modules={} end modules ['l-lpeg']={ version=1.001, @@ -498,6 +498,8 @@ patterns.utfbom_16_le=utfbom_16_le patterns.utfbom_8=utfbom_8 patterns.utf_16_be_nl=P("\000\r\000\n")+P("\000\r")+P("\000\n") patterns.utf_16_le_nl=P("\r\000\n\000")+P("\r\000")+P("\n\000") +patterns.utf_32_be_nl=P("\000\000\000\r\000\000\000\n")+P("\000\000\000\r")+P("\000\000\000\n") +patterns.utf_32_le_nl=P("\r\000\000\000\n\000\000\000")+P("\r\000\000\000")+P("\n\000\000\000") patterns.utf8one=R("\000\127") patterns.utf8two=R("\194\223")*utf8next patterns.utf8three=R("\224\239")*utf8next*utf8next @@ -1080,6 +1082,65 @@ local case_2=period*(digit-trailingzeros)^1*(trailingzeros/"") local number=digit^1*(case_1+case_2) local stripper=Cs((number+1)^0) lpeg.patterns.stripzeros=stripper +local byte_to_HEX={} +local byte_to_hex={} +local byte_to_dec={} +local hex_to_byte={} +for i=0,255 do + local H=format("%02X",i) + local h=format("%02x",i) + local d=format("%03i",i) + local c=char(i) + byte_to_HEX[c]=H + byte_to_hex[c]=h + byte_to_dec[c]=d + hex_to_byte[h]=c + hex_to_byte[H]=c +end +local hextobyte=P(2)/hex_to_byte +local bytetoHEX=P(1)/byte_to_HEX +local bytetohex=P(1)/byte_to_hex +local bytetodec=P(1)/byte_to_dec +local hextobytes=Cs(hextobyte^0) +local bytestoHEX=Cs(bytetoHEX^0) +local bytestohex=Cs(bytetohex^0) +local bytestodec=Cs(bytetodec^0) +patterns.hextobyte=hextobyte +patterns.bytetoHEX=bytetoHEX +patterns.bytetohex=bytetohex +patterns.bytetodec=bytetodec +patterns.hextobytes=hextobytes +patterns.bytestoHEX=bytestoHEX +patterns.bytestohex=bytestohex +patterns.bytestodec=bytestodec +function string.toHEX(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end +function string.tohex(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestohex,s) + end +end +function string.todec(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestodec,s) + end +end +function string.tobytes(s) + if not s or s=="" then + return s + else + return lpegmatch(hextobytes,s) + end +end end -- of closure @@ -1212,7 +1273,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-table"] = package.loaded["l-table"] or true --- original size: 33477, stripped down to: 21843 +-- original size: 33499, stripped down to: 21844 if not modules then modules={} end modules ['l-table']={ version=1.001, @@ -1259,7 +1320,7 @@ local function compare(a,b) if ta==tb then return a0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +local utf_16_be_getbom=patterns.utfbom_16_be^-1 +local utf_16_le_getbom=patterns.utfbom_16_le^-1 +local utf_32_be_getbom=patterns.utfbom_32_be^-1 +local utf_32_le_getbom=patterns.utfbom_32_le^-1 +local utf_16_be_linesplitter=utf_16_be_getbom*lpeg.tsplitat(patterns.utf_16_be_nl) +local utf_16_le_linesplitter=utf_16_le_getbom*lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_32_be_linesplitter=utf_32_be_getbom*lpeg.tsplitat(patterns.utf_32_be_nl) +local utf_32_le_linesplitter=utf_32_le_getbom*lpeg.tsplitat(patterns.utf_32_le_nl) +local more=0 +local p_utf16_to_utf8_be=C(1)*C(1)/function(left,right) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf16_to_utf8_le=C(1)*C(1)/function(right,left) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf32_to_utf8_be=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(a)+256*256*byte(b)+256*byte(c)+byte(d)) +end +local p_utf32_to_utf8_le=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(d)+256*256*byte(c)+256*byte(b)+byte(a)) +end +p_utf16_to_utf8_be=P(true)/function() more=0 end*utf_16_be_getbom*Cs(p_utf16_to_utf8_be^0) +p_utf16_to_utf8_le=P(true)/function() more=0 end*utf_16_le_getbom*Cs(p_utf16_to_utf8_le^0) +p_utf32_to_utf8_be=P(true)/function() more=0 end*utf_32_be_getbom*Cs(p_utf32_to_utf8_be^0) +p_utf32_to_utf8_le=P(true)/function() more=0 end*utf_32_le_getbom*Cs(p_utf32_to_utf8_le^0) +patterns.utf16_to_utf8_be=p_utf16_to_utf8_be +patterns.utf16_to_utf8_le=p_utf16_to_utf8_le +patterns.utf32_to_utf8_be=p_utf32_to_utf8_be +patterns.utf32_to_utf8_le=p_utf32_to_utf8_le +utf16_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_be,s) + else + return s end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in bytepairs(t[i]) do - if right then - local now=256*right+left - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +end +utf16_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_be_linesplitter,t) end - utf32_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*256*256*a+256*256*b - else - r=r+1 - result[t]=utfchar(more+256*a+b) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_be,s) end - return t end - utf32_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*b+a - else - r=r+1 - result[t]=utfchar(more+256*256*256*b+256*256*a) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) - end - return t + return t +end +utf16_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_le,s) + else + return s end -else - utf16_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_be_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if left=="\000" then - r=r+1 - result[r]=utfchar(byte(right)) - elseif right then - local now=256*byte(left)+byte(right) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) +end +utf16_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_le,s) end - return t end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) + return t +end +utf32_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_be,s) + else + return s + end +end +utf32_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_be_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_be,s) end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if right=="\000" then - r=r+1 - result[r]=utfchar(byte(left)) - elseif right then - local now=256*byte(right)+byte(left) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) + end + return t +end +utf32_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_le,s) + else + return s + end +end +utf32_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_le,s) end - return t end - utf32_to_utf8_le=function() return {} end - utf32_to_utf8_be=function() return {} end + return t end +utf.utf16_to_utf8_le_t=utf16_to_utf8_le_t +utf.utf16_to_utf8_be_t=utf16_to_utf8_be_t +utf.utf32_to_utf8_le_t=utf32_to_utf8_le_t +utf.utf32_to_utf8_be_t=utf32_to_utf8_be_t utf.utf16_to_utf8_le=utf16_to_utf8_le utf.utf16_to_utf8_be=utf16_to_utf8_be utf.utf32_to_utf8_le=utf32_to_utf8_le utf.utf32_to_utf8_be=utf32_to_utf8_be -function utf.utf8_to_utf8(t) +function utf.utf8_to_utf8_t(t) return type(t)=="string" and lpegmatch(utflinesplitter,t) or t end -function utf.utf16_to_utf8(t,endian) - return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +function utf.utf16_to_utf8_t(t,endian) + return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t end -function utf.utf32_to_utf8(t,endian) - return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t +function utf.utf32_to_utf8_t(t,endian) + return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t end -local function little(c) - local b=byte(c) +local function little(b) if b<0x10000 then return char(b%256,b/256) else @@ -4893,8 +4939,7 @@ local function little(c) return char(b1%256,b1/256,b2%256,b2/256) end end -local function big(c) - local b=byte(c) +local function big(b) if b<0x10000 then return char(b/256,b%256) else @@ -4903,27 +4948,29 @@ local function big(c) return char(b1/256,b1%256,b2/256,b2%256) end end -local l_remap=utf.remapper(little,"pattern") -local b_remap=utf.remapper(big,"pattern") -function utf.utf8_to_utf16_be(str,nobom) +local l_remap=Cs((p_utf8byte/little+P(1)/"")^0) +local b_remap=Cs((p_utf8byte/big+P(1)/"")^0) +local function utf8_to_utf16_be(str,nobom) if nobom then return lpegmatch(b_remap,str) else return char(254,255)..lpegmatch(b_remap,str) end end -function utf.utf8_to_utf16_le(str,nobom) +local function utf8_to_utf16_le(str,nobom) if nobom then return lpegmatch(l_remap,str) else return char(255,254)..lpegmatch(l_remap,str) end end +utf.utf8_to_utf16_be=utf8_to_utf16_be +utf.utf8_to_utf16_le=utf8_to_utf16_le function utf.utf8_to_utf16(str,littleendian,nobom) if littleendian then - return utf.utf8_to_utf16_le(str,nobom) + return utf8_to_utf16_le(str,nobom) else - return utf.utf8_to_utf16_be(str,nobom) + return utf8_to_utf16_be(str,nobom) end end local pattern=Cs ( @@ -4939,16 +4986,16 @@ function utf.xstring(s) return format("0x%05X",type(s)=="number" and s or utfbyte(s)) end function utf.toeight(str) - if not str then + if not str or str=="" then return nil end local utftype=lpegmatch(p_utfstricttype,str) if utftype=="utf-8" then - return sub(str,4) - elseif utftype=="utf-16-le" then - return utf16_to_utf8_le(str) + return sub(str,4) elseif utftype=="utf-16-be" then - return utf16_to_utf8_ne(str) + return utf16_to_utf8_be(str) + elseif utftype=="utf-16-le" then + return utf16_to_utf8_le(str) else return str end @@ -8942,7 +8989,7 @@ do -- create closure to overcome 200 locals limit package.loaded["util-tpl"] = package.loaded["util-tpl"] or true --- original size: 6251, stripped down to: 3488 +-- original size: 6621, stripped down to: 3627 if not modules then modules={} end modules ['util-tpl']={ version=1.001, @@ -8984,7 +9031,7 @@ local sqlescape=lpeg.replacer { { "\r\n","\\n" }, { "\r","\\n" }, } -local sqlquoted=lpeg.Cs(lpeg.Cc("'")*sqlescape*lpeg.Cc("'")) +local sqlquoted=Cs(Cc("'")*sqlescape*Cc("'")) lpegpatterns.sqlescape=sqlescape lpegpatterns.sqlquoted=sqlquoted local luaescape=lpegpatterns.luaescape @@ -9007,12 +9054,20 @@ local quotedescapers={ local luaescaper=escapers.lua local quotedluaescaper=quotedescapers.lua local function replacekeyunquoted(s,t,how,recurse) - local escaper=how and escapers[how] or luaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and escapers[how] or luaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local function replacekeyquoted(s,t,how,recurse) - local escaper=how and quotedescapers[how] or quotedluaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and quotedescapers[how] or quotedluaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local single=P("%") local double=P("%%") @@ -17585,8 +17640,8 @@ end -- of closure -- used libraries : l-lua.lua l-package.lua l-lpeg.lua l-function.lua l-string.lua l-table.lua l-io.lua l-number.lua l-set.lua l-os.lua l-file.lua l-gzip.lua l-md5.lua l-url.lua l-dir.lua l-boolean.lua l-unicode.lua l-math.lua util-str.lua util-tab.lua util-sto.lua util-prs.lua util-fmt.lua trac-set.lua trac-log.lua trac-inf.lua trac-pro.lua util-lua.lua util-deb.lua util-mrg.lua util-tpl.lua util-env.lua luat-env.lua lxml-tab.lua lxml-lpt.lua lxml-mis.lua lxml-aux.lua lxml-xml.lua trac-xml.lua data-ini.lua data-exp.lua data-env.lua data-tmp.lua data-met.lua data-res.lua data-pre.lua data-inp.lua data-out.lua data-fil.lua data-con.lua data-use.lua data-zip.lua data-tre.lua data-sch.lua data-lua.lua data-aux.lua data-tmf.lua data-lst.lua util-lib.lua luat-sta.lua luat-fmt.lua -- skipped libraries : - --- original bytes : 724607 --- stripped bytes : 257595 +-- original bytes : 728320 +-- stripped bytes : 259616 -- end library merge diff --git a/scripts/context/stubs/win64/mtxrun.lua b/scripts/context/stubs/win64/mtxrun.lua index fcfdc1f17..33a8912df 100644 --- a/scripts/context/stubs/win64/mtxrun.lua +++ b/scripts/context/stubs/win64/mtxrun.lua @@ -437,7 +437,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-lpeg"] = package.loaded["l-lpeg"] or true --- original size: 32003, stripped down to: 16772 +-- original size: 33805, stripped down to: 18228 if not modules then modules={} end modules ['l-lpeg']={ version=1.001, @@ -498,6 +498,8 @@ patterns.utfbom_16_le=utfbom_16_le patterns.utfbom_8=utfbom_8 patterns.utf_16_be_nl=P("\000\r\000\n")+P("\000\r")+P("\000\n") patterns.utf_16_le_nl=P("\r\000\n\000")+P("\r\000")+P("\n\000") +patterns.utf_32_be_nl=P("\000\000\000\r\000\000\000\n")+P("\000\000\000\r")+P("\000\000\000\n") +patterns.utf_32_le_nl=P("\r\000\000\000\n\000\000\000")+P("\r\000\000\000")+P("\n\000\000\000") patterns.utf8one=R("\000\127") patterns.utf8two=R("\194\223")*utf8next patterns.utf8three=R("\224\239")*utf8next*utf8next @@ -1080,6 +1082,65 @@ local case_2=period*(digit-trailingzeros)^1*(trailingzeros/"") local number=digit^1*(case_1+case_2) local stripper=Cs((number+1)^0) lpeg.patterns.stripzeros=stripper +local byte_to_HEX={} +local byte_to_hex={} +local byte_to_dec={} +local hex_to_byte={} +for i=0,255 do + local H=format("%02X",i) + local h=format("%02x",i) + local d=format("%03i",i) + local c=char(i) + byte_to_HEX[c]=H + byte_to_hex[c]=h + byte_to_dec[c]=d + hex_to_byte[h]=c + hex_to_byte[H]=c +end +local hextobyte=P(2)/hex_to_byte +local bytetoHEX=P(1)/byte_to_HEX +local bytetohex=P(1)/byte_to_hex +local bytetodec=P(1)/byte_to_dec +local hextobytes=Cs(hextobyte^0) +local bytestoHEX=Cs(bytetoHEX^0) +local bytestohex=Cs(bytetohex^0) +local bytestodec=Cs(bytetodec^0) +patterns.hextobyte=hextobyte +patterns.bytetoHEX=bytetoHEX +patterns.bytetohex=bytetohex +patterns.bytetodec=bytetodec +patterns.hextobytes=hextobytes +patterns.bytestoHEX=bytestoHEX +patterns.bytestohex=bytestohex +patterns.bytestodec=bytestodec +function string.toHEX(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end +function string.tohex(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestohex,s) + end +end +function string.todec(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestodec,s) + end +end +function string.tobytes(s) + if not s or s=="" then + return s + else + return lpegmatch(hextobytes,s) + end +end end -- of closure @@ -1212,7 +1273,7 @@ do -- create closure to overcome 200 locals limit package.loaded["l-table"] = package.loaded["l-table"] or true --- original size: 33477, stripped down to: 21843 +-- original size: 33499, stripped down to: 21844 if not modules then modules={} end modules ['l-table']={ version=1.001, @@ -1259,7 +1320,7 @@ local function compare(a,b) if ta==tb then return a0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +local utf_16_be_getbom=patterns.utfbom_16_be^-1 +local utf_16_le_getbom=patterns.utfbom_16_le^-1 +local utf_32_be_getbom=patterns.utfbom_32_be^-1 +local utf_32_le_getbom=patterns.utfbom_32_le^-1 +local utf_16_be_linesplitter=utf_16_be_getbom*lpeg.tsplitat(patterns.utf_16_be_nl) +local utf_16_le_linesplitter=utf_16_le_getbom*lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_32_be_linesplitter=utf_32_be_getbom*lpeg.tsplitat(patterns.utf_32_be_nl) +local utf_32_le_linesplitter=utf_32_le_getbom*lpeg.tsplitat(patterns.utf_32_le_nl) +local more=0 +local p_utf16_to_utf8_be=C(1)*C(1)/function(left,right) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf16_to_utf8_le=C(1)*C(1)/function(right,left) + local now=256*byte(left)+byte(right) + if more>0 then + now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 + more=0 + return utfchar(now) + elseif now>=0xD800 and now<=0xDBFF then + more=now + else + return utfchar(now) + end +end +local p_utf32_to_utf8_be=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(a)+256*256*byte(b)+256*byte(c)+byte(d)) +end +local p_utf32_to_utf8_le=C(1)*C(1)*C(1)*C(1)/function(a,b,c,d) + return utfchar(256*256*256*byte(d)+256*256*byte(c)+256*byte(b)+byte(a)) +end +p_utf16_to_utf8_be=P(true)/function() more=0 end*utf_16_be_getbom*Cs(p_utf16_to_utf8_be^0) +p_utf16_to_utf8_le=P(true)/function() more=0 end*utf_16_le_getbom*Cs(p_utf16_to_utf8_le^0) +p_utf32_to_utf8_be=P(true)/function() more=0 end*utf_32_be_getbom*Cs(p_utf32_to_utf8_be^0) +p_utf32_to_utf8_le=P(true)/function() more=0 end*utf_32_le_getbom*Cs(p_utf32_to_utf8_le^0) +patterns.utf16_to_utf8_be=p_utf16_to_utf8_be +patterns.utf16_to_utf8_le=p_utf16_to_utf8_le +patterns.utf32_to_utf8_be=p_utf32_to_utf8_be +patterns.utf32_to_utf8_le=p_utf32_to_utf8_le +utf16_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_be,s) + else + return s end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in bytepairs(t[i]) do - if right then - local now=256*right+left - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) - end - return t +end +utf16_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_be_linesplitter,t) end - utf32_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*256*256*a+256*256*b - else - r=r+1 - result[t]=utfchar(more+256*a+b) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_be,s) end - return t end - utf32_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utflinesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,-1 - for a,b in bytepairs(t[i]) do - if a and b then - if more<0 then - more=256*b+a - else - r=r+1 - result[t]=utfchar(more+256*256*256*b+256*256*a) - more=-1 - end - else - break - end - end - t[i]=concat(result,"",1,r) - end - return t + return t +end +utf16_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf16_to_utf8_le,s) + else + return s end -else - utf16_to_utf8_be=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_be_linesplitter,t) - end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if left=="\000" then - r=r+1 - result[r]=utfchar(byte(right)) - elseif right then - local now=256*byte(left)+byte(right) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) +end +utf16_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_16_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf16_to_utf8_le,s) end - return t end - utf16_to_utf8_le=function(t) - if type(t)=="string" then - t=lpegmatch(utf_16_le_linesplitter,t) + return t +end +utf32_to_utf8_be=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_be,s) + else + return s + end +end +utf32_to_utf8_be_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_be_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_be,s) end - local result={} - for i=1,#t do - local r,more=0,0 - for left,right in gmatch(t[i],"(.)(.)") do - if right=="\000" then - r=r+1 - result[r]=utfchar(byte(left)) - elseif right then - local now=256*byte(right)+byte(left) - if more>0 then - now=(more-0xD800)*0x400+(now-0xDC00)+0x10000 - more=0 - r=r+1 - result[r]=utfchar(now) - elseif now>=0xD800 and now<=0xDBFF then - more=now - else - r=r+1 - result[r]=utfchar(now) - end - end - end - t[i]=concat(result,"",1,r) + end + return t +end +utf32_to_utf8_le=function(s) + if s and s~="" then + return lpegmatch(p_utf32_to_utf8_le,s) + else + return s + end +end +utf32_to_utf8_le_t=function(t) + if not t then + return nil + elseif type(t)=="string" then + t=lpegmatch(utf_32_le_linesplitter,t) + end + for i=1,#t do + local s=t[i] + if s~="" then + t[i]=lpegmatch(p_utf32_to_utf8_le,s) end - return t end - utf32_to_utf8_le=function() return {} end - utf32_to_utf8_be=function() return {} end + return t end +utf.utf16_to_utf8_le_t=utf16_to_utf8_le_t +utf.utf16_to_utf8_be_t=utf16_to_utf8_be_t +utf.utf32_to_utf8_le_t=utf32_to_utf8_le_t +utf.utf32_to_utf8_be_t=utf32_to_utf8_be_t utf.utf16_to_utf8_le=utf16_to_utf8_le utf.utf16_to_utf8_be=utf16_to_utf8_be utf.utf32_to_utf8_le=utf32_to_utf8_le utf.utf32_to_utf8_be=utf32_to_utf8_be -function utf.utf8_to_utf8(t) +function utf.utf8_to_utf8_t(t) return type(t)=="string" and lpegmatch(utflinesplitter,t) or t end -function utf.utf16_to_utf8(t,endian) - return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +function utf.utf16_to_utf8_t(t,endian) + return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t end -function utf.utf32_to_utf8(t,endian) - return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t +function utf.utf32_to_utf8_t(t,endian) + return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t end -local function little(c) - local b=byte(c) +local function little(b) if b<0x10000 then return char(b%256,b/256) else @@ -4893,8 +4939,7 @@ local function little(c) return char(b1%256,b1/256,b2%256,b2/256) end end -local function big(c) - local b=byte(c) +local function big(b) if b<0x10000 then return char(b/256,b%256) else @@ -4903,27 +4948,29 @@ local function big(c) return char(b1/256,b1%256,b2/256,b2%256) end end -local l_remap=utf.remapper(little,"pattern") -local b_remap=utf.remapper(big,"pattern") -function utf.utf8_to_utf16_be(str,nobom) +local l_remap=Cs((p_utf8byte/little+P(1)/"")^0) +local b_remap=Cs((p_utf8byte/big+P(1)/"")^0) +local function utf8_to_utf16_be(str,nobom) if nobom then return lpegmatch(b_remap,str) else return char(254,255)..lpegmatch(b_remap,str) end end -function utf.utf8_to_utf16_le(str,nobom) +local function utf8_to_utf16_le(str,nobom) if nobom then return lpegmatch(l_remap,str) else return char(255,254)..lpegmatch(l_remap,str) end end +utf.utf8_to_utf16_be=utf8_to_utf16_be +utf.utf8_to_utf16_le=utf8_to_utf16_le function utf.utf8_to_utf16(str,littleendian,nobom) if littleendian then - return utf.utf8_to_utf16_le(str,nobom) + return utf8_to_utf16_le(str,nobom) else - return utf.utf8_to_utf16_be(str,nobom) + return utf8_to_utf16_be(str,nobom) end end local pattern=Cs ( @@ -4939,16 +4986,16 @@ function utf.xstring(s) return format("0x%05X",type(s)=="number" and s or utfbyte(s)) end function utf.toeight(str) - if not str then + if not str or str=="" then return nil end local utftype=lpegmatch(p_utfstricttype,str) if utftype=="utf-8" then - return sub(str,4) - elseif utftype=="utf-16-le" then - return utf16_to_utf8_le(str) + return sub(str,4) elseif utftype=="utf-16-be" then - return utf16_to_utf8_ne(str) + return utf16_to_utf8_be(str) + elseif utftype=="utf-16-le" then + return utf16_to_utf8_le(str) else return str end @@ -8942,7 +8989,7 @@ do -- create closure to overcome 200 locals limit package.loaded["util-tpl"] = package.loaded["util-tpl"] or true --- original size: 6251, stripped down to: 3488 +-- original size: 6621, stripped down to: 3627 if not modules then modules={} end modules ['util-tpl']={ version=1.001, @@ -8984,7 +9031,7 @@ local sqlescape=lpeg.replacer { { "\r\n","\\n" }, { "\r","\\n" }, } -local sqlquoted=lpeg.Cs(lpeg.Cc("'")*sqlescape*lpeg.Cc("'")) +local sqlquoted=Cs(Cc("'")*sqlescape*Cc("'")) lpegpatterns.sqlescape=sqlescape lpegpatterns.sqlquoted=sqlquoted local luaescape=lpegpatterns.luaescape @@ -9007,12 +9054,20 @@ local quotedescapers={ local luaescaper=escapers.lua local quotedluaescaper=quotedescapers.lua local function replacekeyunquoted(s,t,how,recurse) - local escaper=how and escapers[how] or luaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and escapers[how] or luaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local function replacekeyquoted(s,t,how,recurse) - local escaper=how and quotedescapers[how] or quotedluaescaper - return escaper(replacekey(s,t,how,recurse)) + if how==false then + return replacekey(s,t,how,recurse) + else + local escaper=how and quotedescapers[how] or quotedluaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local single=P("%") local double=P("%%") @@ -17585,8 +17640,8 @@ end -- of closure -- used libraries : l-lua.lua l-package.lua l-lpeg.lua l-function.lua l-string.lua l-table.lua l-io.lua l-number.lua l-set.lua l-os.lua l-file.lua l-gzip.lua l-md5.lua l-url.lua l-dir.lua l-boolean.lua l-unicode.lua l-math.lua util-str.lua util-tab.lua util-sto.lua util-prs.lua util-fmt.lua trac-set.lua trac-log.lua trac-inf.lua trac-pro.lua util-lua.lua util-deb.lua util-mrg.lua util-tpl.lua util-env.lua luat-env.lua lxml-tab.lua lxml-lpt.lua lxml-mis.lua lxml-aux.lua lxml-xml.lua trac-xml.lua data-ini.lua data-exp.lua data-env.lua data-tmp.lua data-met.lua data-res.lua data-pre.lua data-inp.lua data-out.lua data-fil.lua data-con.lua data-use.lua data-zip.lua data-tre.lua data-sch.lua data-lua.lua data-aux.lua data-tmf.lua data-lst.lua util-lib.lua luat-sta.lua luat-fmt.lua -- skipped libraries : - --- original bytes : 724607 --- stripped bytes : 257595 +-- original bytes : 728320 +-- stripped bytes : 259616 -- end library merge diff --git a/tex/context/base/attr-lay.lua b/tex/context/base/attr-lay.lua index 176af1a2c..e7d532237 100644 --- a/tex/context/base/attr-lay.lua +++ b/tex/context/base/attr-lay.lua @@ -80,6 +80,7 @@ local layerstacker = utilities.stacker.new("layers") -- experiment layerstacker.mode = "stack" layerstacker.unset = attributes.unsetvalue +viewerlayers.resolve_reset = layerstacker.resolve_reset viewerlayers.resolve_begin = layerstacker.resolve_begin viewerlayers.resolve_step = layerstacker.resolve_step viewerlayers.resolve_end = layerstacker.resolve_end diff --git a/tex/context/base/back-exp.lua b/tex/context/base/back-exp.lua index e64b7b77c..5fa765b79 100644 --- a/tex/context/base/back-exp.lua +++ b/tex/context/base/back-exp.lua @@ -38,7 +38,6 @@ local validstring = string.valid local lpegmatch = lpeg.match local utfchar, utfvalues = utf.char, utf.values local insert, remove = table.insert, table.remove -local fromunicode16 = fonts.mappings.fromunicode16 local sortedhash = table.sortedhash local formatters = string.formatters local todimen = number.todimen @@ -2507,13 +2506,18 @@ or pap if fc then fc = fc and fc[c] if fc then - local u = fc.tounicode - if u and u ~= "" then + local u = fc.unicode + if not u then nofcurrentcontent = nofcurrentcontent + 1 - currentcontent[nofcurrentcontent] = utfchar(fromunicode16(u)) + currentcontent[nofcurrentcontent] = utfchar(c) + elseif type(u) == "table" then + for i=1,#u do + nofcurrentcontent = nofcurrentcontent + 1 + currentcontent[nofcurrentcontent] = utfchar(u[i]) + end else nofcurrentcontent = nofcurrentcontent + 1 - currentcontent[nofcurrentcontent] = utfchar(c) + currentcontent[nofcurrentcontent] = utfchar(u) end else -- weird, happens in hz (we really need to get rid of the pseudo fonts) nofcurrentcontent = nofcurrentcontent + 1 @@ -3092,7 +3096,7 @@ end end end - local cssfile = nil directives.register("backend.export.css", function(v) cssfile = v end) + -- local cssfile = nil directives.register("backend.export.css", function(v) cssfile = v end) local function stopexport(v) starttiming(treehash) @@ -3177,6 +3181,8 @@ end local stylefilename = file.join(stylepath,stylefilebase ) local templatefilename = file.join(stylepath,templatefilebase) + local cssfile = finetuning.cssfile + -- we keep track of all used files local files = { @@ -3196,7 +3202,7 @@ end file.copy(examplefilename,defaultfilename) end - if type(cssfile) == "string" then + if cssfile then local list = table.unique(settings_to_array(cssfile)) for i=1,#list do local source = file.addsuffix(list[i],"css") diff --git a/tex/context/base/back-exp.mkiv b/tex/context/base/back-exp.mkiv index 7a9824555..dc510b2a6 100644 --- a/tex/context/base/back-exp.mkiv +++ b/tex/context/base/back-exp.mkiv @@ -165,10 +165,8 @@ \c!alternative=, % html, div \c!properties=\v!no, % no: ignore, yes: as attribute, otherwise: use as prefix \c!hyphen=\v!no, - \c!svgstyle=] - -\setupbackend - [css=] % ? + \c!svgstyle=, + \c!cssfile=] \def\dosynchronizeexport {\let\currentexport\empty @@ -184,6 +182,7 @@ firstpage = "\exportparameter\c!firstpage", lastpage = "\exportparameter\c!lastpage", svgstyle = "\exportparameter\c!svgstyle", + cssfile = "\exportparameter\c!cssfile", }}} \appendtoks @@ -194,14 +193,14 @@ \doifsomething{\backendparameter\c!export}\dosynchronizeexport % in case it is done inside \starttext \to \everysetupdocument -\appendtoks - \doifsomething{\backendparameter\c!xhtml} - {\enabledirectives[backend.export.xhtml=\backendparameter\c!xhtml]}% - \doifsomething{\backendparameter\c!css} - {\enabledirectives[backend.export.css={\backendparameter\c!css}]}% - \doifsomething{\backendparameter\c!alternative} - {\enabledirectives[backend.export.alternative={\backendparameter\c!alternative}]}% -\to \everysetupbackend +% \appendtoks +% \doifsomething{\backendparameter\c!xhtml} +% {\enabledirectives[backend.export.xhtml=\backendparameter\c!xhtml]}% +% \doifsomething{\backendparameter\c!css} +% {\enabledirectives[backend.export.css={\backendparameter\c!css}]}% +% \doifsomething{\backendparameter\c!alternative} +% {\enabledirectives[backend.export.alternative={\backendparameter\c!alternative}]}% +% \to \everysetupbackend \appendtoks \doifelsenothing{\backendparameter\c!export} diff --git a/tex/context/base/back-pdf.mkiv b/tex/context/base/back-pdf.mkiv index c91d2251d..413365539 100644 --- a/tex/context/base/back-pdf.mkiv +++ b/tex/context/base/back-pdf.mkiv @@ -50,7 +50,7 @@ %D These too and most of them will be protected as well: -\pdfminorversion \plussix +\pdfminorversion \plusseven \pdfgentounicode \plusone \let\pdfgentounicode \undefined \newcount\pdfgentounicode \pdfinclusioncopyfonts \plusone \let\pdfinclusioncopyfonts \undefined \newcount\pdfinclusioncopyfonts \pdfinclusionerrorlevel \zerocount \let\pdfinclusionerrorlevel\undefined \newcount\pdfinclusionerrorlevel diff --git a/tex/context/base/char-def.lua b/tex/context/base/char-def.lua index 84420ae9d..864ca26c4 100644 --- a/tex/context/base/char-def.lua +++ b/tex/context/base/char-def.lua @@ -66888,6 +66888,8 @@ characters.data={ linebreak="al", mathclass="topaccent", mathname="overbracket", + mathextensible="h", + mathfiller="overbracketfill", unicodeslot=0x23B4, }, [0x23B5]={ @@ -66897,6 +66899,8 @@ characters.data={ linebreak="al", mathclass="botaccent", mathname="underbracket", + mathextensible="h", + mathfiller="underbracketfill", unicodeslot=0x23B5, }, [0x23B6]={ @@ -67172,6 +67176,8 @@ characters.data={ linebreak="al", mathclass="topaccent", mathname="overparent", + mathextensible="h", + mathfiller="overparentfill", unicodeslot=0x23DC, }, [0x23DD]={ @@ -67181,6 +67187,8 @@ characters.data={ linebreak="al", mathclass="botaccent", mathname="underparent", + mathextensible="h", + mathfiller="underparentfill", unicodeslot=0x23DD, }, [0x23DE]={ @@ -67190,6 +67198,8 @@ characters.data={ linebreak="al", mathclass="topaccent", mathname="overbrace", + mathextensible="h", + mathfiller="overbracefill", unicodeslot=0x23DE, }, [0x23DF]={ @@ -67199,6 +67209,8 @@ characters.data={ linebreak="al", mathclass="botaccent", mathname="underbrace", + mathextensible="h", + mathfiller="underbracefill", unicodeslot=0x23DF, }, [0x23E0]={ diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 0083e7aee..c58ec05bd 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2014.09.27 14:46} +\newcontextversion{2014.10.07 11:14} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. @@ -32,6 +32,123 @@ % \inheritmaintextcolor % \to \everymargindatacontent +% This is experimental; if this changes we need to adapt the mb-mp +% style too. It's not in the core yet. + +% \def\ActionY{\blank\analyzenofparlines{\inleftmargin{\analyzednofparlines}}} +% \def\ActionN{\analyzenofparlines{\inleftmargin{\analyzednofparlines}}} +% +% \saveparnumber\ifnum\nofparlines<2 \ActionY\else\ActionN\fi 1.1 nop \crlf +% 1.2 nop \par +% \saveparnumber\ifnum\nofparlines<2 \ActionY\else\ActionN\fi 2.1 nop \par +% \saveparnumber\ifnum\nofparlines<2 \ActionY\else\ActionN\fi 3.1 yes \crlf +% 3.2 nop \crlf +% 3.3 nop \par +% \saveparnumber\ifnum\nofparlines<2 \ActionY\else\ActionN\fi 4.1 nop \crlf +% 4.2 nop \par +% \saveparnumber\ifnum\nofparlines<2 \ActionY\else\ActionN\fi 5.1 nop \par +% \saveparnumber\ifnum\nofparlines<2 \ActionY\else\ActionN\fi 6.1 yes \par +% +% \saveparnumber\doifelselessparlines {2}\ActionY\ActionN 1.1 nop \crlf +% 1.2 nop \par +% \saveparnumber\doifelselessparlines {2}\ActionY\ActionN 2.1 nop \par +% \saveparnumber\doifelselessparlines {2}\ActionY\ActionN 3.1 yes \crlf +% 3.2 nop \crlf +% 3.3 nop \par +% \saveparnumber\doifelselessparlines {2}\ActionY\ActionN 4.1 nop \crlf +% 4.2 nop \par +% \saveparnumber\doifelselessparlines {2}\ActionY\ActionN 5.1 nop \par +% \saveparnumber\doifelselessparlines {2}\ActionY\ActionN 6.1 yes \par + +\newcount \c_typo_par_current +\newcount \c_typo_par_saved +\newconstant\c_typo_par_state +\newconstant\c_typo_par_lines +\newconstant\c_typo_par_criterium + +\appendtoks + \advance\c_typo_par_current\plusone % local +\to \everypar + +\unexpanded\def\saveparnumber + {\c_typo_par_saved\tagparcounter} % local + +\def\savedparnumber {\number\c_typo_par_saved} +\def\currentparnumber{\number\c_typo_par_current} + +\unexpanded\def\nofparlines + {\numexpr + \ifhmode + \maxdimen + \else\ifnum\c_typo_par_current=\c_typo_par_current + % we have not yet started a new one + \prevgraf + \else\ifnum\c_typo_par_current>\c_typo_par_current + % we are somewhere in the future + \maxdimen + \else + \zerocount + \fi\fi\fi + \relax} + +\unexpanded\def\setnofparlinesstate + {\c_typo_par_state\numexpr + \ifhmode + \zerocount + \else\ifnum\c_typo_par_current=\c_typo_par_current + % we have not yet started a new one + \plusone + \else\ifnum\c_typo_par_current>\c_typo_par_current + % we are somewhere in the future + \plustwo + \else + \plusthree + \fi\fi\fi + \relax} + +\unexpanded\def\shownofparlines + {\dontleavehmode\hbox\bgroup + \infofont + [% + \number\c_typo_par_current:\number\c_typo_par_current:\space + \ifcase\c_typo_par_state + unknown% + \or + \ifnum\c_typo_par_lines<\c_typo_par_criterium + \darkred + \number\c_typo_par_lines<\number\c_typo_par_criterium + \else + \darkgreen + \number\c_typo_par_lines>=\number\c_typo_par_criterium + \fi + \or + ahead% + \else + behind% + \fi + ]% + \egroup} + +\unexpanded\def\doifelselessparlines#1% + {\c_typo_par_criterium#1\relax + \c_typo_par_lines\prevgraf + \setnofparlinesstate + \ifnum\nofparlines<#1\relax + \expandafter\firstoftwoarguments + \else + \expandafter\secondoftwoarguments + \fi} + +\unexpanded\def\doiflessparlines#1% + {\c_typo_par_criterium#1\relax + \c_typo_par_lines\prevgraf + \setnofparlinesstate + \ifnum\nofparlines<#1\relax + \expandafter\firstofoneargument + \else + \expandafter\gobbleoneargument + \fi} + %D Maybe: \unexpanded\def\tightvbox{\dowithnextbox{\dp\nextbox\zeropoint\box\nextbox}\vbox} diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf index ae0441b81..68d39a739 100644 Binary files a/tex/context/base/context-version.pdf and b/tex/context/base/context-version.pdf differ diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index 6e183c88b..08aaa01c0 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -28,7 +28,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2014.09.27 14:46} +\edef\contextversion{2014.10.07 11:14} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/data-tex.lua b/tex/context/base/data-tex.lua index 04c5ef469..b6b97a0a9 100644 --- a/tex/context/base/data-tex.lua +++ b/tex/context/base/data-tex.lua @@ -77,13 +77,13 @@ function helpers.textopener(tag,filename,filehandle,coding) report_tex("%a opener: %a opened using method %a",tag,filename,coding) end if coding == "utf-16-be" then - lines = utf.utf16_to_utf8_be(lines) + lines = utf.utf16_to_utf8_be_t(lines) elseif coding == "utf-16-le" then - lines = utf.utf16_to_utf8_le(lines) + lines = utf.utf16_to_utf8_le_t(lines) elseif coding == "utf-32-be" then - lines = utf.utf32_to_utf8_be(lines) + lines = utf.utf32_to_utf8_be_t(lines) elseif coding == "utf-32-le" then - lines = utf.utf32_to_utf8_le(lines) + lines = utf.utf32_to_utf8_le_t(lines) else -- utf8 or unknown (could be a mkvi file) local runner = textfileactions.runner if runner then diff --git a/tex/context/base/file-job.lua b/tex/context/base/file-job.lua index 3b67057e0..0d1986463 100644 --- a/tex/context/base/file-job.lua +++ b/tex/context/base/file-job.lua @@ -961,16 +961,24 @@ luatex.registerstopactions(function() logsnewline() report_options("start commandline options") logsnewline() - for argument, value in sortedhash(arguments) do - report_option("%s=%A",argument,value) + if arguments and next(arguments) then + for argument, value in sortedhash(arguments) do + report_option("%s=%A",argument,value) + end + else + report_file("no arguments") end logsnewline() report_options("stop commandline options") logsnewline() report_options("start commandline files") logsnewline() - for i=1,#files do - report_file("% 4i: %s",i,files[i]) + if files and #files > 0 then + for i=1,#files do + report_file("% 4i: %s",i,files[i]) + end + else + report_file("no files") end logsnewline() report_options("stop commandline files") diff --git a/tex/context/base/font-afm.lua b/tex/context/base/font-afm.lua index 46ea8a423..ca5616a1e 100644 --- a/tex/context/base/font-afm.lua +++ b/tex/context/base/font-afm.lua @@ -40,6 +40,8 @@ local trace_defining = false trackers.register("fonts.defining", function(v local report_afm = logs.reporter("fonts","afm loading") +local setmetatableindex = table.setmetatableindex + local findbinfile = resolvers.findbinfile local definers = fonts.definers @@ -52,7 +54,7 @@ local pfb = constructors.newhandler("pfb") local afmfeatures = constructors.newfeatures("afm") local registerafmfeature = afmfeatures.register -afm.version = 1.410 -- incrementing this number one up will force a re-cache +afm.version = 1.500 -- incrementing this number one up will force a re-cache afm.cache = containers.define("fonts", "afm", afm.version, true) afm.autoprefixed = true -- this will become false some day (catches texnansi-blabla.*) @@ -62,6 +64,8 @@ afm.addligatures = true -- best leave this set to true afm.addtexligatures = true -- best leave this set to true afm.addkerns = true -- best leave this set to true +local overloads = fonts.mappings.overloads + local applyruntimefixes = fonts.treatments and fonts.treatments.applyfixes local function setmode(tfmdata,value) @@ -315,7 +319,7 @@ by adding ligatures and kern information to the afm derived data. That way we can set them faster when defining a font.

--ldx]]-- -local addkerns, addligatures, addtexligatures, unify, normalize -- we will implement these later +local addkerns, addligatures, addtexligatures, unify, normalize, fixnames -- we will implement these later function afm.load(filename) -- hm, for some reasons not resolved yet @@ -362,6 +366,7 @@ function afm.load(filename) addkerns(data) end normalize(data) + fixnames(data) report_afm("add tounicode data") fonts.mappings.addtounicode(data,filename) data.size = size @@ -369,6 +374,7 @@ function afm.load(filename) data.pfbsize = pfbsize data.pfbtime = pfbtime report_afm("saving %a in cache",name) + data.resources.unicodes = nil -- consistent with otf but here we save not much data = containers.write(afm.cache, name, data) data = containers.read(afm.cache,name) end @@ -432,13 +438,29 @@ unify = function(data, filename) resources.filename = resolvers.unresolve(filename) -- no shortcut resources.unicodes = unicodes -- name to unicode resources.marks = { } -- todo - resources.names = names -- name to index + -- resources.names = names -- name to index resources.private = private end normalize = function(data) end +fixnames = function(data) + for k, v in next, data.descriptions do + local n = v.name + local r = overloads[n] + if r then + local name = r.name + if trace_indexing then + report_afm("renaming characters %a to %a",n,name) + end + v.name = name + v.unicode = r.unicode + end + end +end + + --[[ldx--

These helpers extend the basic table with extra ligatures, texligatures and extra kerns. This saves quite some lookups later.

@@ -449,7 +471,7 @@ local addthem = function(rawdata,ligatures) local descriptions = rawdata.descriptions local resources = rawdata.resources local unicodes = resources.unicodes - local names = resources.names + -- local names = resources.names for ligname, ligdata in next, ligatures do local one = descriptions[unicodes[ligname]] if one then @@ -608,8 +630,8 @@ local function copytotfm(data) local filename = constructors.checkedfilename(resources) local fontname = metadata.fontname or metadata.fullname local fullname = metadata.fullname or metadata.fontname - local endash = unicodes['space'] - local emdash = unicodes['emdash'] + local endash = 0x0020 -- space + local emdash = 0x2014 local spacer = "space" local spaceunits = 500 -- @@ -669,7 +691,7 @@ local function copytotfm(data) parameters.x_height = charxheight else -- same as otf - local x = unicodes['x'] + local x = 0x0078 -- x if x then local x = descriptions[x] if x then @@ -729,7 +751,34 @@ function afm.setfeatures(tfmdata,features) end end -local function checkfeatures(specification) +local function addtables(data) + local resources = data.resources + local lookuptags = resources.lookuptags + local unicodes = resources.unicodes + if not lookuptags then + lookuptags = { } + resources.lookuptags = lookuptags + end + setmetatableindex(lookuptags,function(t,k) + local v = type(k) == "number" and ("lookup " .. k) or k + t[k] = v + return v + end) + if not unicodes then + unicodes = { } + resources.unicodes = unicodes + setmetatableindex(unicodes,function(t,k) + setmetatableindex(unicodes,nil) + for u, d in next, data.descriptions do + local n = d.name + if n then + t[n] = u + end + end + return rawget(t,k) + end) + end + constructors.addcoreunicodes(unicodes) -- do we really need this? end local function afmtotfm(specification) @@ -759,6 +808,7 @@ local function afmtotfm(specification) if not tfmdata then local rawdata = afm.load(afmname) if rawdata and next(rawdata) then + addtables(rawdata) adddimensions(rawdata) tfmdata = copytotfm(rawdata) if tfmdata and next(tfmdata) then @@ -808,6 +858,7 @@ those that make sense for this format.

local function prepareligatures(tfmdata,ligatures,value) if value then local descriptions = tfmdata.descriptions + local hasligatures = false for unicode, character in next, tfmdata.characters do local description = descriptions[unicode] local dligatures = description.ligatures @@ -823,17 +874,20 @@ local function prepareligatures(tfmdata,ligatures,value) type = 0 } end + hasligatures = true end end + tfmdata.properties.hasligatures = hasligatures end end local function preparekerns(tfmdata,kerns,value) if value then - local rawdata = tfmdata.shared.rawdata - local resources = rawdata.resources - local unicodes = resources.unicodes + local rawdata = tfmdata.shared.rawdata + local resources = rawdata.resources + local unicodes = resources.unicodes local descriptions = tfmdata.descriptions + local haskerns = false for u, chr in next, tfmdata.characters do local d = descriptions[u] local newkerns = d[kerns] @@ -849,8 +903,10 @@ local function preparekerns(tfmdata,kerns,value) kerns[uk] = v end end + haskerns = true end end + tfmdata.properties.haskerns = haskerns end end diff --git a/tex/context/base/font-agl.lua b/tex/context/base/font-agl.lua index 42a41a15d..122d1adc2 100644 --- a/tex/context/base/font-agl.lua +++ b/tex/context/base/font-agl.lua @@ -656,6 +656,8 @@ end -- We load this table only when needed. We could use a loading mechanism -- return the table but there are no more vectors like this so why bother. +-- +-- Well, we currently hav ethis table preloaded anyway. local agl = { names = names, -- unicode -> name diff --git a/tex/context/base/font-con.lua b/tex/context/base/font-con.lua index aca705523..dd4cfa56a 100644 --- a/tex/context/base/font-con.lua +++ b/tex/context/base/font-con.lua @@ -394,7 +394,8 @@ function constructors.scale(tfmdata,specification) targetparameters.forcedsize = forcedsize -- context specific targetparameters.extrafactor = extrafactor -- context specific -- - local tounicode = resources.tounicode + local tounicode = fonts.mappings.tounicode + -- local defaultwidth = resources.defaultwidth or 0 local defaultheight = resources.defaultheight or 0 local defaultdepth = resources.defaultdepth or 0 @@ -500,7 +501,8 @@ function constructors.scale(tfmdata,specification) local autoitalicamount = properties.autoitalicamount local stackmath = not properties.nostackmath local nonames = properties.noglyphnames - local nodemode = properties.mode == "node" + local haskerns = properties.haskerns or properties.mode == "base" -- we can have afm in node mode + local hasligatures = properties.hasligatures or properties.mode == "base" -- we can have afm in node mode -- if changed and not next(changed) then changed = false @@ -594,39 +596,20 @@ function constructors.scale(tfmdata,specification) -- we can have a dumb mode (basemode without math etc) that skips most -- for unicode, character in next, characters do - local chr, description, index, touni + local chr, description, index if changed then - -- basemode hack (we try to catch missing tounicodes, e.g. needed for ssty in math cambria) local c = changed[unicode] if c then - local ligatures = character.ligatures -- the original ligatures (as we cannot rely on remapping) description = descriptions[c] or descriptions[unicode] or character character = characters[c] or character index = description.index or c - if tounicode then - touni = tounicode[index] -- nb: index! - if not touni then -- goodie - local d = descriptions[unicode] or characters[unicode] - local i = d.index or unicode - touni = tounicode[i] -- nb: index! - end - end - if ligatures and not character.ligatures then - character.ligatures = ligatures -- the original targets (for now at least.. see libertine smallcaps) - end else description = descriptions[unicode] or character index = description.index or unicode - if tounicode then - touni = tounicode[index] -- nb: index! - end end else description = descriptions[unicode] or character index = description.index or unicode - if tounicode then - touni = tounicode[index] -- nb: index! - end end local width = description.width local height = description.height @@ -669,8 +652,10 @@ function constructors.scale(tfmdata,specification) } end end - if touni then - chr.tounicode = touni + local isunicode = description.unicode + if isunicode then + chr.unicode = isunicode + chr.tounicode = tounicode(isunicode) end if hasquality then -- we could move these calculations elsewhere (saves calculations) @@ -767,7 +752,7 @@ function constructors.scale(tfmdata,specification) end end end - if not nodemode then + if haskerns then local vk = character.kerns if vk then local s = sharedkerns[vk] @@ -778,6 +763,8 @@ function constructors.scale(tfmdata,specification) end chr.kerns = s end + end + if hasligatures then local vl = character.ligatures if vl then if true then @@ -1362,3 +1349,50 @@ function constructors.applymanipulators(what,tfmdata,features,trace,report) end end end + +function constructors.addcoreunicodes(unicodes) -- maybe make this a metatable if used at all + if not unicodes then + unicodes = { } + end + unicodes.space = 0x0020 + unicodes.hyphen = 0x002D + unicodes.zwj = 0x200D + unicodes.zwnj = 0x200C + return unicodes +end + +-- -- keep for a while: old tounicode code +-- +-- if changed then +-- -- basemode hack (we try to catch missing tounicodes, e.g. needed for ssty in math cambria) +-- local c = changed[unicode] +-- if c then +-- -- local ligatures = character.ligatures -- the original ligatures (as we cannot rely on remapping) +-- description = descriptions[c] or descriptions[unicode] or character +-- character = characters[c] or character +-- index = description.index or c +-- if tounicode then +-- touni = tounicode[index] -- nb: index! +-- if not touni then -- goodie +-- local d = descriptions[unicode] or characters[unicode] +-- local i = d.index or unicode +-- touni = tounicode[i] -- nb: index! +-- end +-- end +-- -- if ligatures and not character.ligatures then +-- -- character.ligatures = ligatures -- the original targets (for now at least.. see libertine smallcaps) +-- -- end +-- else +-- description = descriptions[unicode] or character +-- index = description.index or unicode +-- if tounicode then +-- touni = tounicode[index] -- nb: index! +-- end +-- end +-- else +-- description = descriptions[unicode] or character +-- index = description.index or unicode +-- if tounicode then +-- touni = tounicode[index] -- nb: index! +-- end +-- end diff --git a/tex/context/base/font-ctx.lua b/tex/context/base/font-ctx.lua index 5920501dd..51f152baf 100644 --- a/tex/context/base/font-ctx.lua +++ b/tex/context/base/font-ctx.lua @@ -57,6 +57,8 @@ local helpers = fonts.helpers local hashes = fonts.hashes local currentfont = font.current +local aglunicodes = fonts.encodings.agl.unicodes + local nuts = nodes.nuts local tonut = nuts.tonut @@ -82,6 +84,7 @@ local characters = hashes.characters local descriptions = hashes.descriptions local properties = hashes.properties local resources = hashes.resources +local unicodes = hashes.unicodes local csnames = hashes.csnames local lastmathids = hashes.lastmathids local exheights = hashes.exheights @@ -89,6 +92,9 @@ local emwidths = hashes.emwidths local designsizefilename = fontgoodies.designsizes.filename +local context_char = context.char +local context_getvalue = context.getvalue + local otffeatures = otf.features local otftables = otf.tables @@ -1439,12 +1445,27 @@ mappings.reset() -- resets the default file -- => commands + local function nametoslot(name) local t = type(name) + local s = nil if t == "string" then - return resources[true].unicodes[name] + local slot = unicodes[true][name] + if slot then + return slot + end + slot = aglunicodes[name] + if characters[true][slot] then + return slot + else + -- not in font + end elseif t == "number" then - return n + if characters[true][name] then + return slot + else + -- not in font + end end end @@ -1472,14 +1493,14 @@ do -- else too many locals local entities = characters.entities local lowered = { } -- delayed initialization - table.setmetatableindex(lowered,function(t,k) + setmetatableindex(lowered,function(t,k) for k, v in next, entities do local l = lower(k) if not entities[l] then lowered[l] = v end end - table.setmetatableindex(lowered,nil) + setmetatableindex(lowered,nil) return lowered[k] end) @@ -1523,7 +1544,7 @@ do -- else too many locals -- -- nicer: -- - -- table.setmetatableindex(methods,function(t,k) return methods.c end) + -- setmetatableindex(methods,function(t,k) return methods.c end) -- -- local splitter = (C(1) * P(":") + Cc("c")) * C(P(1)^1) / function(method,name) -- return methods[method](name) @@ -1712,9 +1733,6 @@ end -- interfaces -local context_char = context.char -local context_getvalue = context.getvalue - local commands_doifelse = commands.doifelse function commands.doifelsecurrentfonthasfeature(name) -- can be made faster with a supportedfeatures hash diff --git a/tex/context/base/font-enh.lua b/tex/context/base/font-enh.lua index 2bf0741f5..3439a434a 100644 --- a/tex/context/base/font-enh.lua +++ b/tex/context/base/font-enh.lua @@ -114,24 +114,24 @@ local registerotffeature = otffeatures.register -- unicodes = { -- a1 = 0x2701, -local tosixteen = fonts.mappings.tounicode16 +----- tosixteen = fonts.mappings.tounicode16 local function initializeunicoding(tfmdata) local goodies = tfmdata.goodies local newcoding = nil - local tounicode = false + -- local tounicode = false for i=1,#goodies do local remapping = goodies[i].remapping if remapping and remapping.unicodes then - newcoding = remapping.unicodes -- names to unicodes - tounicode = remapping.tounicode + newcoding = remapping.unicodes -- names to unicodes + -- tounicode = remapping.tounicode -- not used end end if newcoding then local characters = tfmdata.characters local descriptions = tfmdata.descriptions local oldcoding = tfmdata.resources.unicodes - local tounicodes = tfmdata.resources.tounicode -- index to unicode + -- local tounicodes = tfmdata.resources.tounicode -- index to unicode local originals = { } for name, newcode in next, newcoding do local oldcode = oldcoding[name] @@ -153,15 +153,15 @@ local function initializeunicoding(tfmdata) else oldcoding[name] = newcode end - if tounicode then - local description = descriptions[newcode] - if description then - local index = description.index - if not tounicodes[index] then - tounicodes[index] = tosixteen(newcode) -- shared (we could have a metatable) - end - end - end + -- if tounicode then + -- local description = descriptions[newcode] + -- if description then + -- local index = description.index + -- if not tounicodes[index] then + -- tounicodes[index] = tosixteen(newcode) -- shared (we could have a metatable) + -- end + -- end + -- end if trace_unicoding then if oldcode then report_unicoding("aliasing glyph %a from %U to %U",name,oldcode,newcode) diff --git a/tex/context/base/font-ext.lua b/tex/context/base/font-ext.lua index ede2151d6..68dab3c46 100644 --- a/tex/context/base/font-ext.lua +++ b/tex/context/base/font-ext.lua @@ -328,8 +328,10 @@ local function map_opbd_onto_protrusion(tfmdata,value,opbd) local characters = tfmdata.characters local descriptions = tfmdata.descriptions local properties = tfmdata.properties + local resources = tfmdata.resources local rawdata = tfmdata.shared.rawdata local lookuphash = rawdata.lookuphash + local lookuptags = resources.lookuptags local script = properties.script local language = properties.language local done, factor, left, right = false, 1, 1, 1 @@ -349,14 +351,14 @@ local function map_opbd_onto_protrusion(tfmdata,value,opbd) local data = lookuphash[lookup] if data then if trace_protrusion then - report_protrusions("setting left using lfbd lookup %a",lookup) + report_protrusions("setting left using lfbd lookup %a",lookuptags[lookup]) end for k, v in next, data do -- local p = - v[3] / descriptions[k].width-- or 1 ~= 0 too but the same local p = - (v[1] / 1000) * factor * left characters[k].left_protruding = p if trace_protrusion then - report_protrusions("lfbd -> %s -> %C -> %0.03f (% t)",lookup,k,p,v) + report_protrusions("lfbd -> %s -> %C -> %0.03f (% t)",lookuptags[lookup],k,p,v) end end done = true @@ -372,14 +374,14 @@ local function map_opbd_onto_protrusion(tfmdata,value,opbd) local data = lookuphash[lookup] if data then if trace_protrusion then - report_protrusions("setting right using rtbd lookup %a",lookup) + report_protrusions("setting right using rtbd lookup %a",lookuptags[lookup]) end for k, v in next, data do -- local p = v[3] / descriptions[k].width -- or 3 local p = (v[1] / 1000) * factor * right characters[k].right_protruding = p if trace_protrusion then - report_protrusions("rtbd -> %s -> %C -> %0.03f (% t)",lookup,k,p,v) + report_protrusions("rtbd -> %s -> %C -> %0.03f (% t)",lookuptags[lookup],k,p,v) end end end diff --git a/tex/context/base/font-gds.lua b/tex/context/base/font-gds.lua index 9e7cb841e..c2c506b7a 100644 --- a/tex/context/base/font-gds.lua +++ b/tex/context/base/font-gds.lua @@ -853,7 +853,7 @@ local function setkeepligatures(tfmdata,value) if letterspacing then local keptligatures = letterspacing.keptligatures if keptligatures then - local unicodes = tfmdata.resources.unicodes + local unicodes = tfmdata.resources.unicodes -- so we accept names local hash = { } for k, v in next, keptligatures do local u = unicodes[k] diff --git a/tex/context/base/font-hsh.lua b/tex/context/base/font-hsh.lua index 1b0dd08b8..2be84165a 100644 --- a/tex/context/base/font-hsh.lua +++ b/tex/context/base/font-hsh.lua @@ -35,6 +35,7 @@ local italics = hashes.italics or allocate() local lastmathids = hashes.lastmathids or allocate() local dynamics = hashes.dynamics or allocate() local unicodes = hashes.unicodes or allocate() +local originals = hashes.originals or allocate() hashes.characters = characters hashes.descriptions = descriptions @@ -52,6 +53,7 @@ hashes.italics = italics hashes.lastmathids = lastmathids hashes.dynamics = dynamics hashes.unicodes = unicodes +hashes.originals = originals local nodepool = nodes.pool local dummyglyph = nodepool.register(nodepool.glyph()) @@ -261,21 +263,31 @@ setmetatableindex(dynamics, function(t,k) end end) -setmetatableindex(unicodes, function(t,k) +setmetatableindex(unicodes, function(t,k) -- always a unicode + if k == true then + return unicodes[currentfont()] + else + local resources = resources[k] + local unicodes = resources and resources.unicodes or { } + t[k] = unicodes + return unicodes + end +end) + +setmetatableindex(originals, function(t,k) -- always a unicode if k == true then return originals[currentfont()] else - local resources = resources[k] - local originals = resources and resources.originals or { } - local characters = characters[k] - local unicodes = { } - setmetatableindex(unicodes,function(t,k) - local v = originals[characters[k].index] or k - t[k] = v + local resolved = { } + setmetatableindex(resolved,function(t,name) + local u = unicodes[k][name] + local d = u and descriptions[k][u] + local v = d and d.unicode or u or 0 -- so we return notdef (at least for the moment) + t[name] = u return v end) - t[k] = unicodes - return unicodes + t[k] = resolved + return resolved end end) diff --git a/tex/context/base/font-ini.mkvi b/tex/context/base/font-ini.mkvi index c427c2f89..f174b132b 100644 --- a/tex/context/base/font-ini.mkvi +++ b/tex/context/base/font-ini.mkvi @@ -2170,7 +2170,7 @@ %D Handy for manuals: \unexpanded\def\fontchar#character% - {\ctxcommand{fontchar("#character")}} + {\ctxcommand{fontchar(\!!bs#character\!!es)}} \unexpanded\def\fontcharbyindex#index% unofficial command, for idris' font building {\ctxcommand{fontcharbyindex(\number#index)}} @@ -2190,12 +2190,12 @@ %D This is an expandable command! \def\tochar#specifications% - {\ctxcommand{tochar("#specifications")}} % expanded (also used in edef) + {\ctxcommand{tochar(\!!bs#specifications\!!es)}} % expanded (also used in edef) %D The next auxilliary macro is an alternative to \type %D {\fontname}. -\def\purefontname#font{\ctxcommand{purefontname("\fontname#font")}} +\def\purefontname#font{\ctxcommand{purefontname(\!!bs\fontname#font\!!es)}} %def\purefontname#font{\ctxcommand{purefontname(\number\fontid#font)}} %D \macros diff --git a/tex/context/base/font-map.lua b/tex/context/base/font-map.lua index 429c73597..890e47d3f 100644 --- a/tex/context/base/font-map.lua +++ b/tex/context/base/font-map.lua @@ -6,12 +6,13 @@ if not modules then modules = { } end modules ['font-map'] = { license = "see context related readme files" } -local tonumber = tonumber +local tonumber, next, type = tonumber, next, type local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match local utfbyte = utf.byte local floor = math.floor +local formatters = string.formatters local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) @@ -66,11 +67,14 @@ local function makenameparser(str) end end +local f_single = formatters["%04X"] +local f_double = formatters["%04X%04X"] + local function tounicode16(unicode,name) if unicode < 0x10000 then - return format("%04X",unicode) + return f_single(unicode) elseif unicode < 0x1FFFFFFFFF then - return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00) + return f_double(floor(unicode/1024),unicode%1024+0xDC00) else report_fonts("can't convert %a in %a into tounicode",unicode,name) end @@ -79,18 +83,46 @@ end local function tounicode16sequence(unicodes,name) local t = { } for l=1,#unicodes do - local unicode = unicodes[l] - if unicode < 0x10000 then - t[l] = format("%04X",unicode) + local u = unicodes[l] + if u < 0x10000 then + t[l] = f_single(u) elseif unicode < 0x1FFFFFFFFF then - t[l] = format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00) + t[l] = f_double(floor(u/1024),u%1024+0xDC00) else - report_fonts ("can't convert %a in %a into tounicode",unicode,name) + report_fonts ("can't convert %a in %a into tounicode",u,name) + return end end return concat(t) end +local function tounicode(unicode,name) + if type(unicode) == "table" then + local t = { } + for l=1,#unicode do + local u = unicode[l] + if u < 0x10000 then + t[l] = f_single(u) + elseif u < 0x1FFFFFFFFF then + t[l] = f_double(floor(u/1024),u%1024+0xDC00) + else + report_fonts ("can't convert %a in %a into tounicode",u,name) + return + end + end + return concat(t) + else + if unicode < 0x10000 then + return f_single(unicode) + elseif unicode < 0x1FFFFFFFFF then + return f_double(floor(unicode/1024),unicode%1024+0xDC00) + else + report_fonts("can't convert %a in %a into tounicode",unicode,name) + end + end +end + + local function fromunicode16(str) if #str == 4 then return tonumber(str,16) @@ -136,6 +168,7 @@ end mappings.loadlumtable = loadlumtable mappings.makenameparser = makenameparser +mappings.tounicode = tounicode mappings.tounicode16 = tounicode16 mappings.tounicode16sequence = tounicode16sequence mappings.fromunicode16 = fromunicode16 @@ -158,6 +191,36 @@ local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * -- test("such_so_more") -- test("such_so_more.that") +-- to be completed .. for fonts that use unicodes for ligatures which +-- is a actually a bad thing and should be avoided in the first place + +local overloads = { + IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 }, + ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 }, + ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 }, + fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 }, + fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 }, + ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 }, + ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 }, + fj = { name = "f_j", unicode = { 0x66, 0x6A } }, + fk = { name = "f_k", unicode = { 0x66, 0x6B } }, +} + +require("char-ini") + +for k, v in next, overloads do + local name = v.name + local mess = v.mess + if name then + overloads[name] = v + end + if mess then + overloads[mess] = v + end +end + +mappings.overloads = overloads + function mappings.addtounicode(data,filename) local resources = data.resources local properties = data.properties @@ -168,26 +231,16 @@ function mappings.addtounicode(data,filename) return end -- we need to move this code - unicodes['space'] = unicodes['space'] or 32 - unicodes['hyphen'] = unicodes['hyphen'] or 45 - unicodes['zwj'] = unicodes['zwj'] or 0x200D - unicodes['zwnj'] = unicodes['zwnj'] or 0x200C - -- the tounicode mapping is sparse and only needed for alternatives + unicodes['space'] = unicodes['space'] or 32 + unicodes['hyphen'] = unicodes['hyphen'] or 45 + unicodes['zwj'] = unicodes['zwj'] or 0x200D + unicodes['zwnj'] = unicodes['zwnj'] or 0x200C local private = fonts.constructors.privateoffset - local unknown = format("%04X",utfbyte("?")) local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context ----- namevector = fonts.encodings.agl.names -- loaded runtime in context - local tounicode = { } - local originals = { } local missing = { } - resources.tounicode = tounicode - resources.originals = originals local lumunic, uparser, oparser local cidinfo, cidnames, cidcodes, usedmap - if false then -- will become an option - lumunic = loadlumtable(filename) - lumunic = lumunic and lumunic.tounicode - end -- cidinfo = properties.cidinfo usedmap = cidinfo and fonts.cid.getmap(cidinfo) @@ -202,12 +255,16 @@ function mappings.addtounicode(data,filename) for unic, glyph in next, descriptions do local index = glyph.index local name = glyph.name - if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then + local r = overloads[name] + if r then + -- get rid of weird ligatures + -- glyph.name = r.name + glyph.unicode = r.unicode + elseif unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then local unicode = lumunic and lumunic[name] or unicodevector[name] if unicode then - originals[index] = unicode - tounicode[index] = tounicode16(unicode,name) - ns = ns + 1 + glyph.unicode = unicode + ns = ns + 1 end -- cidmap heuristics, beware, there is no guarantee for a match unless -- the chain resolves @@ -216,9 +273,8 @@ function mappings.addtounicode(data,filename) if foundindex then unicode = cidcodes[foundindex] -- name to number if unicode then - originals[index] = unicode - tounicode[index] = tounicode16(unicode,name) - ns = ns + 1 + glyph.unicode = unicode + ns = ns + 1 else local reference = cidnames[foundindex] -- number to name if reference then @@ -226,23 +282,20 @@ function mappings.addtounicode(data,filename) if foundindex then unicode = cidcodes[foundindex] if unicode then - originals[index] = unicode - tounicode[index] = tounicode16(unicode,name) - ns = ns + 1 + glyph.unicode = unicode + ns = ns + 1 end end if not unicode or unicode == "" then local foundcodes, multiple = lpegmatch(uparser,reference) if foundcodes then - originals[index] = foundcodes + glyph.unicode = foundcodes if multiple then - tounicode[index] = tounicode16sequence(foundcodes) - nl = nl + 1 - unicode = true + nl = nl + 1 + unicode = true else - tounicode[index] = tounicode16(foundcodes,name) - ns = ns + 1 - unicode = foundcodes + ns = ns + 1 + unicode = foundcodes end end end @@ -289,11 +342,9 @@ function mappings.addtounicode(data,filename) if n == 0 then -- done then -- nothing elseif n == 1 then - originals[index] = t[1] - tounicode[index] = tounicode16(t[1],name) + glyph.unicode = t[1] else - originals[index] = t - tounicode[index] = tounicode16sequence(t) + glyph.unicode = t end nl = nl + 1 end @@ -301,32 +352,29 @@ function mappings.addtounicode(data,filename) if not unicode or unicode == "" then local foundcodes, multiple = lpegmatch(uparser,name) if foundcodes then + glyph.unicode = foundcodes if multiple then - originals[index] = foundcodes - tounicode[index] = tounicode16sequence(foundcodes,name) - nl = nl + 1 - unicode = true + nl = nl + 1 + unicode = true else - originals[index] = foundcodes - tounicode[index] = tounicode16(foundcodes,name) - ns = ns + 1 - unicode = foundcodes + ns = ns + 1 + unicode = foundcodes end end end -- check using substitutes and alternates + local r = overloads[unicode] + if r then + unicode = r.unicode + glyph.unicode = unicode + end -- if not unicode then missing[name] = true end - -- if not unicode then - -- originals[index] = 0xFFFD - -- tounicode[index] = "FFFD" - -- end end end if next(missing) then --- inspect(missing) local guess = { } -- helper local function check(gname,code,unicode) @@ -344,12 +392,15 @@ function mappings.addtounicode(data,filename) return end -- the variant already has a tounicode - local index = descriptions[code].index - if tounicode[index] then + if descriptions[code].unicode then return end -- add to the list local g = guess[variant] + -- local r = overloads[unicode] + -- if r then + -- unicode = r.unicode + -- end if g then g[gname] = unicode else @@ -413,52 +464,51 @@ function mappings.addtounicode(data,filename) end end end - -- generate tounicodes + -- wrap up + local orphans = 0 + local guessed = 0 for k, v in next, guess do if type(v) == "number" then - guess[k] = tounicode16(v) + descriptions[unicodes[k]].unicode = descriptions[v].unicode or v -- can also be a table + guessed = guessed + 1 else local t = nil local l = lower(k) local u = unicodes[l] if not u then - -- forget about it + orphans = orphans + 1 elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then - t = tounicode[descriptions[u].index] - else - -- t = u - end - if t then - guess[k] = t + local unicode = descriptions[u].unicode + if unicode then + descriptions[unicodes[k]].unicode = unicode + guessed = guessed + 1 + else + orphans = orphans + 1 + end else - guess[k] = "FFFD" + orphans = orphans + 1 end end end - local orphans = 0 - local guessed = 0 - for k, v in next, guess do - tounicode[descriptions[unicodes[k]].index] = v - if v == "FFFD" then - orphans = orphans + 1 - guess[k] = false - else - guessed = guessed + 1 - guess[k] = true - end - end - -- resources.nounicode = guess -- only when we test things if trace_loading and orphans > 0 or guessed > 0 then report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans) end end if trace_mapping then for unic, glyph in table.sortedhash(descriptions) do - local name = glyph.name - local index = glyph.index - local toun = tounicode[index] - if toun then - report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun) + local name = glyph.name + local index = glyph.index + local unicode = glyph.unicode + if unicode then + if type(unicode) == "table" then + local unicodes = { } + for i=1,#unicode do + unicodes[i] = formatters("%U",unicode[i]) + end + report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,unic,unicodes) + else + report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,unic,unicode) + end else report_fonts("internal slot %U, name %a, unicode %U",index,name,unic) end diff --git a/tex/context/base/font-mis.lua b/tex/context/base/font-mis.lua index b934837f4..22f4ccc58 100644 --- a/tex/context/base/font-mis.lua +++ b/tex/context/base/font-mis.lua @@ -22,7 +22,7 @@ local handlers = fonts.handlers handlers.otf = handlers.otf or { } local otf = handlers.otf -otf.version = otf.version or 2.762 +otf.version = otf.version or 2.802 otf.cache = otf.cache or containers.define("fonts", "otf", otf.version, true) function otf.loadcached(filename,format,sub) diff --git a/tex/context/base/font-nod.lua b/tex/context/base/font-nod.lua index 2311cebeb..da3d9def9 100644 --- a/tex/context/base/font-nod.lua +++ b/tex/context/base/font-nod.lua @@ -407,16 +407,18 @@ local function toutf(list,result,nofresult,stopcriterium) if fc then local fcc = fc[c] if fcc then - -- == fromunicode - local u = fcc.tounicode - if u then - for s in gmatch(u,"....") do + local u = fcc.unicode + if not u then + nofresult = nofresult + 1 + result[nofresult] = utfchar(c) + elseif type(u) == "table" then + for i=1,#u do nofresult = nofresult + 1 - result[nofresult] = utfchar(tonumber(s,16)) + result[nofresult] = utfchar(u[i]) end else nofresult = nofresult + 1 - result[nofresult] = utfchar(c) + result[nofresult] = utfchar(u) end else nofresult = nofresult + 1 diff --git a/tex/context/base/font-otb.lua b/tex/context/base/font-otb.lua index a68b57c8a..4e955a197 100644 --- a/tex/context/base/font-otb.lua +++ b/tex/context/base/font-otb.lua @@ -7,7 +7,7 @@ if not modules then modules = { } end modules ['font-otb'] = { } local concat = table.concat local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip -local type, next, tonumber, tostring = type, next, tonumber, tostring +local type, next, tonumber, tostring, rawget = type, next, tonumber, tostring, rawget local lpegmatch = lpeg.match local utfchar = utf.char @@ -63,40 +63,40 @@ local function gref(descriptions,n) end end -local function cref(feature,lookupname) +local function cref(feature,lookuptags,lookupname) if lookupname then - return formatters["feature %a, lookup %a"](feature,lookupname) + return formatters["feature %a, lookup %a"](feature,lookuptags[lookupname]) else return formatters["feature %a"](feature) end end -local function report_alternate(feature,lookupname,descriptions,unicode,replacement,value,comment) +local function report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,comment) report_prepare("%s: base alternate %s => %s (%S => %S)", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,unicode), replacement and gref(descriptions,replacement), value, comment) end -local function report_substitution(feature,lookupname,descriptions,unicode,substitution) +local function report_substitution(feature,lookuptags,lookupname,descriptions,unicode,substitution) report_prepare("%s: base substitution %s => %S", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,unicode), gref(descriptions,substitution)) end -local function report_ligature(feature,lookupname,descriptions,unicode,ligature) +local function report_ligature(feature,lookuptags,lookupname,descriptions,unicode,ligature) report_prepare("%s: base ligature %s => %S", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,ligature), gref(descriptions,unicode)) end -local function report_kern(feature,lookupname,descriptions,unicode,otherunicode,value) +local function report_kern(feature,lookuptags,lookupname,descriptions,unicode,otherunicode,value) report_prepare("%s: base kern %s + %s => %S", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,unicode), gref(descriptions,otherunicode), value) @@ -181,7 +181,7 @@ local function finalize_ligatures(tfmdata,ligatures) local characters = tfmdata.characters local descriptions = tfmdata.descriptions local resources = tfmdata.resources - local unicodes = resources.unicodes + local unicodes = resources.unicodes -- we use rawget in order to avoid bulding the table local private = resources.private local alldone = false while not alldone do @@ -217,12 +217,12 @@ local function finalize_ligatures(tfmdata,ligatures) local secondname = firstname .. "_" .. secondcode if i == size - 1 then target = unicode - if not unicodes[secondname] then + if not rawget(unicodes,secondname) then unicodes[secondname] = unicode -- map final ligature onto intermediates end okay = true else - target = unicodes[secondname] + target = rawget(unicodes,secondname) if not target then break end @@ -258,6 +258,7 @@ local function finalize_ligatures(tfmdata,ligatures) end end resources.private = private + return true end end @@ -265,10 +266,11 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local characters = tfmdata.characters local descriptions = tfmdata.descriptions local resources = tfmdata.resources + local properties = tfmdata.properties local changed = tfmdata.changed - local unicodes = resources.unicodes local lookuphash = resources.lookuphash local lookuptypes = resources.lookuptypes + local lookuptags = resources.lookuptags local ligatures = { } local alternate = tonumber(value) or true and 1 @@ -279,39 +281,39 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local trace_ligatures = trace_baseinit and trace_ligatures local actions = { - substitution = function(lookupdata,lookupname,description,unicode) + substitution = function(lookupdata,lookuptags,lookupname,description,unicode) if trace_singles then - report_substitution(feature,lookupname,descriptions,unicode,lookupdata) + report_substitution(feature,lookuptags,lookupname,descriptions,unicode,lookupdata) end changed[unicode] = lookupdata end, - alternate = function(lookupdata,lookupname,description,unicode) + alternate = function(lookupdata,lookuptags,lookupname,description,unicode) local replacement = lookupdata[alternate] if replacement then changed[unicode] = replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"normal") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"normal") end elseif defaultalt == "first" then replacement = lookupdata[1] changed[unicode] = replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end elseif defaultalt == "last" then replacement = lookupdata[#data] if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end else if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"unknown") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"unknown") end end end, - ligature = function(lookupdata,lookupname,description,unicode) + ligature = function(lookupdata,lookuptags,lookupname,description,unicode) if trace_ligatures then - report_ligature(feature,lookupname,descriptions,unicode,lookupdata) + report_ligature(feature,lookuptags,lookupname,descriptions,unicode,lookupdata) end ligatures[#ligatures+1] = { unicode, lookupdata } end, @@ -328,7 +330,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local lookuptype = lookuptypes[lookupname] local action = actions[lookuptype] if action then - action(lookupdata,lookupname,description,unicode) + action(lookupdata,lookuptags,lookupname,description,unicode) end end end @@ -343,24 +345,25 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local action = actions[lookuptype] if action then for i=1,#lookuplist do - action(lookuplist[i],lookupname,description,unicode) + action(lookuplist[i],lookuptags,lookupname,description,unicode) end end end end end end - - finalize_ligatures(tfmdata,ligatures) + properties.hasligatures = finalize_ligatures(tfmdata,ligatures) end local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist) -- todo what kind of kerns, currently all local characters = tfmdata.characters local descriptions = tfmdata.descriptions local resources = tfmdata.resources - local unicodes = resources.unicodes + local properties = tfmdata.properties + local lookuptags = resources.lookuptags local sharedkerns = { } local traceindeed = trace_baseinit and trace_kerns + local haskerns = false for unicode, character in next, characters do local description = descriptions[unicode] local rawkerns = description.kerns -- shared @@ -384,13 +387,13 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist newkerns = { [otherunicode] = value } done = true if traceindeed then - report_kern(feature,lookup,descriptions,unicode,otherunicode,value) + report_kern(feature,lookuptags,lookup,descriptions,unicode,otherunicode,value) end elseif not newkerns[otherunicode] then -- first wins newkerns[otherunicode] = value done = true if traceindeed then - report_kern(feature,lookup,descriptions,unicode,otherunicode,value) + report_kern(feature,lookuptags,lookup,descriptions,unicode,otherunicode,value) end end end @@ -399,12 +402,14 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist if done then sharedkerns[rawkerns] = newkerns character.kerns = newkerns -- no empty assignments + haskerns = true else sharedkerns[rawkerns] = false end end end end + properties.haskerns = haskerns end basemethods.independent = { @@ -434,13 +439,13 @@ local function make_1(present,tree,name) end end -local function make_2(present,tfmdata,characters,tree,name,preceding,unicode,done,lookupname) +local function make_2(present,tfmdata,characters,tree,name,preceding,unicode,done,lookuptags,lookupname) for k, v in next, tree do if k == "ligature" then local character = characters[preceding] if not character then if trace_baseinit then - report_prepare("weird ligature in lookup %a, current %C, preceding %C",lookupname,v,preceding) + report_prepare("weird ligature in lookup %a, current %C, preceding %C",lookuptags[lookupname],v,preceding) end character = makefake(tfmdata,name,present) end @@ -461,7 +466,7 @@ local function make_2(present,tfmdata,characters,tree,name,preceding,unicode,don else local code = present[name] or unicode local name = name .. "_" .. k - make_2(present,tfmdata,characters,v,name,code,k,done,lookupname) + make_2(present,tfmdata,characters,v,name,code,k,done,lookuptags,lookupname) end end end @@ -473,6 +478,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local changed = tfmdata.changed local lookuphash = resources.lookuphash local lookuptypes = resources.lookuptypes + local lookuptags = resources.lookuptags local ligatures = { } local alternate = tonumber(value) or true and 1 @@ -489,7 +495,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis for unicode, data in next, lookupdata do if lookuptype == "substitution" then if trace_singles then - report_substitution(feature,lookupname,descriptions,unicode,data) + report_substitution(feature,lookuptags,lookupname,descriptions,unicode,data) end changed[unicode] = data elseif lookuptype == "alternate" then @@ -497,28 +503,28 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis if replacement then changed[unicode] = replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"normal") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"normal") end elseif defaultalt == "first" then replacement = data[1] changed[unicode] = replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end elseif defaultalt == "last" then replacement = data[#data] if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end else if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"unknown") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"unknown") end end elseif lookuptype == "ligature" then ligatures[#ligatures+1] = { unicode, data, lookupname } if trace_ligatures then - report_ligature(feature,lookupname,descriptions,unicode,data) + report_ligature(feature,lookuptags,lookupname,descriptions,unicode,data) end end end @@ -541,7 +547,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis for i=1,nofligatures do local ligature = ligatures[i] local unicode, tree, lookupname = ligature[1], ligature[2], ligature[3] - make_2(present,tfmdata,characters,tree,"ctx_"..unicode,unicode,unicode,done,lookupname) + make_2(present,tfmdata,characters,tree,"ctx_"..unicode,unicode,unicode,done,lookuptags,lookupname) end end @@ -552,11 +558,11 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist local characters = tfmdata.characters local descriptions = tfmdata.descriptions local resources = tfmdata.resources + local properties = tfmdata.properties local lookuphash = resources.lookuphash + local lookuptags = resources.lookuptags local traceindeed = trace_baseinit and trace_kerns - -- check out this sharedkerns trickery - for l=1,#lookuplist do local lookupname = lookuplist[l] local lookupdata = lookuphash[lookupname] @@ -571,7 +577,7 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist for otherunicode, kern in next, data do if not kerns[otherunicode] and kern ~= 0 then kerns[otherunicode] = kern - report_kern(feature,lookup,descriptions,unicode,otherunicode,kern) + report_kern(feature,lookuptags,lookup,descriptions,unicode,otherunicode,kern) end end else diff --git a/tex/context/base/font-otf.lua b/tex/context/base/font-otf.lua index c1bb4419c..18b975215 100644 --- a/tex/context/base/font-otf.lua +++ b/tex/context/base/font-otf.lua @@ -24,7 +24,9 @@ local reversed, concat, remove, sortedkeys = table.reversed, table.concat, table local ioflush = io.flush local fastcopy, tohash, derivetable = table.fastcopy, table.tohash, table.derive local formatters = string.formatters +local P, R, S, C, Ct, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.match +local setmetatableindex = table.setmetatableindex local allocate = utilities.storage.allocate local registertracker = trackers.register local registerdirective = directives.register @@ -33,13 +35,16 @@ local stoptiming = statistics.stoptiming local elapsedtime = statistics.elapsedtime local findbinfile = resolvers.findbinfile -local trace_private = false registertracker("otf.private", function(v) trace_private = v end) -local trace_loading = false registertracker("otf.loading", function(v) trace_loading = v end) -local trace_features = false registertracker("otf.features", function(v) trace_features = v end) -local trace_dynamics = false registertracker("otf.dynamics", function(v) trace_dynamics = v end) -local trace_sequences = false registertracker("otf.sequences", function(v) trace_sequences = v end) -local trace_markwidth = false registertracker("otf.markwidth", function(v) trace_markwidth = v end) -local trace_defining = false registertracker("fonts.defining", function(v) trace_defining = v end) +local trace_private = false registertracker("otf.private", function(v) trace_private = v end) +local trace_loading = false registertracker("otf.loading", function(v) trace_loading = v end) +local trace_features = false registertracker("otf.features", function(v) trace_features = v end) +local trace_dynamics = false registertracker("otf.dynamics", function(v) trace_dynamics = v end) +local trace_sequences = false registertracker("otf.sequences", function(v) trace_sequences = v end) +local trace_markwidth = false registertracker("otf.markwidth", function(v) trace_markwidth = v end) +local trace_defining = false registertracker("fonts.defining", function(v) trace_defining = v end) + +local compact_lookups = true registertracker("otf.compactlookups", function(v) compact_lookups = v end) +local purge_names = true registertracker("otf.purgenames", function(v) purge_names = v end) local report_otf = logs.reporter("fonts","otf loading") @@ -48,13 +53,17 @@ local otf = fonts.handlers.otf otf.glists = { "gsub", "gpos" } -otf.version = 2.762 -- beware: also sync font-mis.lua +otf.version = 2.802 -- beware: also sync font-mis.lua otf.cache = containers.define("fonts", "otf", otf.version, true) local fontdata = fonts.hashes.identifiers local chardata = characters and characters.data -- not used -local otffeatures = fonts.constructors.newfeatures("otf") +local definers = fonts.definers +local readers = fonts.readers +local constructors = fonts.constructors + +local otffeatures = constructors.newfeatures("otf") local registerotffeature = otffeatures.register local enhancers = allocate() @@ -62,13 +71,8 @@ otf.enhancers = enhancers local patches = { } enhancers.patches = patches -local definers = fonts.definers -local readers = fonts.readers -local constructors = fonts.constructors - local forceload = false local cleanup = 0 -- mk: 0=885M 1=765M 2=735M (regular run 730M) -local usemetatables = false -- .4 slower on mk but 30 M less mem so we might change the default -- will be directive local packdata = true local syncspace = true local forcenotdef = false @@ -93,7 +97,6 @@ formats.dfont = "truetype" registerdirective("fonts.otf.loader.cleanup", function(v) cleanup = tonumber(v) or (v and 1) or 0 end) registerdirective("fonts.otf.loader.force", function(v) forceload = v end) -registerdirective("fonts.otf.loader.usemetatables", function(v) usemetatables = v end) registerdirective("fonts.otf.loader.pack", function(v) packdata = v end) registerdirective("fonts.otf.loader.syncspace", function(v) syncspace = v end) registerdirective("fonts.otf.loader.forcenotdef", function(v) forcenotdef = v end) @@ -280,6 +283,9 @@ local ordered_enhancers = { "add duplicates", "cleanup tables", + + "compact lookups", + "purge names", } --[[ldx-- @@ -495,7 +501,7 @@ function otf.load(filename,sub,featurefile) -- second argument (format) is gone }, helpers = { -- might go away tounicodelist = splitter, - tounicodetable = lpeg.Ct(splitter), + tounicodetable = Ct(splitter), }, } starttiming(data) @@ -538,6 +544,39 @@ function otf.load(filename,sub,featurefile) -- second argument (format) is gone report_otf("loading from cache using hash %a",hash) end enhance("unpack",data,filename,nil,false) + -- + local resources = data.resources + local lookuptags = resources.lookuptags + local unicodes = resources.unicodes + if not lookuptags then + lookuptags = { } + resources.lookuptags = lookuptags + end + setmetatableindex(lookuptags,function(t,k) + local v = type(k) == "number" and ("lookup " .. k) or k + t[k] = v + return v + end) + if not unicodes then + unicodes = { } + resources.unicodes = unicodes + setmetatableindex(unicodes,function(t,k) + -- use rawget when no table has to be built + setmetatableindex(unicodes,nil) + for u, d in next, data.descriptions do + local n = d.name + if n then + t[n] = u + -- report_otf("accessing known name %a",k) + else + -- report_otf("accessing unknown name %a",k) + end + end + return rawget(t,k) + end) + end + constructors.addcoreunicodes(unicodes) -- do we really need this? + -- if applyruntimefixes then applyruntimefixes(filename,data) end @@ -579,41 +618,29 @@ actions["add dimensions"] = function(data,filename) local defaultheight = resources.defaultheight or 0 local defaultdepth = resources.defaultdepth or 0 local basename = trace_markwidth and file.basename(filename) - if usemetatables then - for _, d in next, descriptions do - local wd = d.width - if not wd then - d.width = defaultwidth - elseif trace_markwidth and wd ~= 0 and d.class == "mark" then - report_otf("mark %a with width %b found in %a",d.name or "",wd,basename) - -- d.width = -wd - end - setmetatable(d,mt) + for _, d in next, descriptions do + local bb, wd = d.boundingbox, d.width + if not wd then + -- or bb? + d.width = defaultwidth + elseif trace_markwidth and wd ~= 0 and d.class == "mark" then + report_otf("mark %a with width %b found in %a",d.name or "",wd,basename) + -- d.width = -wd end - else - for _, d in next, descriptions do - local bb, wd = d.boundingbox, d.width - if not wd then - d.width = defaultwidth - elseif trace_markwidth and wd ~= 0 and d.class == "mark" then - report_otf("mark %a with width %b found in %a",d.name or "",wd,basename) - -- d.width = -wd + -- if forcenotdef and not d.name then + -- d.name = ".notdef" + -- end + if bb then + local ht, dp = bb[4], -bb[2] + if ht == 0 or ht < 0 then + -- not set + else + d.height = ht end - -- if forcenotdef and not d.name then - -- d.name = ".notdef" - -- end - if bb then - local ht, dp = bb[4], -bb[2] - if ht == 0 or ht < 0 then - -- not set - else - d.height = ht - end - if dp == 0 or dp < 0 then - -- not set - else - d.depth = dp - end + if dp == 0 or dp < 0 then + -- not set + else + d.depth = dp end end end @@ -1301,9 +1328,9 @@ local function s_uncover(splitter,cache,cover) local uncovered = cache[cover] if not uncovered then uncovered = lpegmatch(splitter,cover) --- for i=1,#uncovered do --- uncovered[i] = { [uncovered[i]] = true } --- end + -- for i=1,#uncovered do + -- uncovered[i] = { [uncovered[i]] = true } + -- end cache[cover] = uncovered end return { uncovered } @@ -1317,9 +1344,14 @@ local function t_hashed(t,cache) local ti = t[i] local tih = cache[ti] if not tih then - tih = { } - for i=1,#ti do - tih[ti[i]] = true + local tn = #ti + if tn == 1 then + tih = { [ti[1]] = true } + else + tih = { } + for i=1,tn do + tih[ti[i]] = true + end end cache[ti] = tih end @@ -1335,12 +1367,17 @@ end local function s_hashed(t,cache) if t then - local ht = { } local tf = t[1] - for i=1,#tf do - ht[i] = { [tf[i]] = true } + local nf = #tf + if nf == 1 then + return { [tf[1]] = true } + else + local ht = { } + for i=1,nf do + ht[i] = { [tf[i]] = true } + end + return ht end - return ht else return nil end @@ -1791,7 +1828,7 @@ end -- future versions will remove _ -local valid = (lpeg.R("\x00\x7E") - lpeg.S("(){}[]<>%/ \n\r\f\v"))^0 * lpeg.P(-1) +local valid = (R("\x00\x7E") - S("(){}[]<>%/ \n\r\f\v"))^0 * P(-1) local function valid_ps_name(str) return str and str ~= "" and #str < 64 and lpegmatch(valid,str) and true or false @@ -1853,8 +1890,17 @@ actions["check metadata"] = function(data,filename,raw) end actions["cleanup tables"] = function(data,filename,raw) - data.resources.indices = nil -- not needed - data.helpers = nil + local duplicates = data.resources.duplicates + if duplicates then + for k, v in next, duplicates do + if #v == 1 then + duplicates[k] = v[1] + end + end + end + data.resources.indices = nil -- not needed + data.resources.unicodes = nil -- delayed + data.helpers = nil -- tricky as we have no unicodes any more end -- kern: ttf has a table with kerns @@ -1976,6 +2022,164 @@ actions["reorganize glyph anchors"] = function(data,filename,raw) -- when we rep end end +local bogusname = (P("uni") + P("u")) * R("AF","09")^4 + + (P("index") + P("glyph") + S("Ii") * P("dentity") * P(".")^0) * R("09")^1 +local uselessname = (1-bogusname)^0 * bogusname + +actions["purge names"] = function(data,filename,raw) -- not used yet + if purge_names then + local n = 0 + for u, d in next, data.descriptions do + if lpegmatch(uselessname,d.name) then + n = n + 1 + d.name = nil + end + -- d.comment = nil + end + if n > 0 then + report_otf("%s bogus names removed",n) + end + end +end + +actions["compact lookups"] = function(data,filename,raw) + if not compact_lookups then + report_otf("not compacting") + return + end + -- create keyhash + local last = 0 + local tags = table.setmetatableindex({ }, + function(t,k) + last = last + 1 + t[k] = last + return last + end + ) + -- + local descriptions = data.descriptions + local resources = data.resources + -- + for u, d in next, descriptions do + -- + -- -- we can also compact anchors and cursives (basechar basemark baselig mark) + -- + local slookups = d.slookups + if type(slookups) == "table" then + local s = { } + for k, v in next, slookups do + s[tags[k]] = v + end + d.slookups = s + end + -- + local mlookups = d.mlookups + if type(mlookups) == "table" then + local m = { } + for k, v in next, mlookups do + m[tags[k]] = v + end + d.mlookups = m + end + -- + local kerns = d.kerns + if type(kerns) == "table" then + local t = { } + for k, v in next, kerns do + t[tags[k]] = v + end + d.kerns = t + end + end + -- + local lookups = data.lookups + if lookups then + local l = { } + for k, v in next, lookups do + local rules = v.rules + if rules then + for i=1,#rules do + local l = rules[i].lookups + if type(l) == "table" then + for i=1,#l do + l[i] = tags[l[i]] + end + end + end + end + l[tags[k]] = v + end + data.lookups = l + end + -- + local lookups = resources.lookups + if lookups then + local l = { } + for k, v in next, lookups do + local s = v.subtables + if type(s) == "table" then + for i=1,#s do + s[i] = tags[s[i]] + end + end + l[tags[k]] = v + end + resources.lookups = l + end + -- + local sequences = resources.sequences + if sequences then + for i=1,#sequences do + local s = sequences[i] + local n = s.name + if n then + s.name = tags[n] + end + local t = s.subtables + if type(t) == "table" then + for i=1,#t do + t[i] = tags[t[i]] + end + end + end + end + -- + local lookuptypes = resources.lookuptypes + if lookuptypes then + local l = { } + for k, v in next, lookuptypes do + l[tags[k]] = v + end + resources.lookuptypes = l + end + -- + local anchor_to_lookup = resources.anchor_to_lookup + if anchor_to_lookup then + for anchor, lookups in next, anchor_to_lookup do + local l = { } + for lookup, value in next, lookups do + l[tags[lookup]] = value + end + anchor_to_lookup[anchor] = l + end + end + -- + local lookup_to_anchor = resources.lookup_to_anchor + if lookup_to_anchor then + local l = { } + for lookup, value in next, lookup_to_anchor do + l[tags[lookup]] = value + end + resources.lookup_to_anchor = l + end + -- + tags = table.swapped(tags) + -- + report_otf("%s lookup tags compacted",#tags) + -- + resources.lookuptags = tags +end + -- modes: node, base, none function otf.setfeatures(tfmdata,features) @@ -2116,8 +2320,8 @@ local function copytotfm(data,cache_id) parameters.charwidth = charwidth parameters.charxheight = charxheight -- - local space = 0x0020 -- unicodes['space'], unicodes['emdash'] - local emdash = 0x2014 -- unicodes['space'], unicodes['emdash'] + local space = 0x0020 + local emdash = 0x2014 if monospaced then if descriptions[space] then spaceunits, spacer = descriptions[space].width, "space" @@ -2166,7 +2370,7 @@ local function copytotfm(data,cache_id) if charxheight then parameters.x_height = charxheight else - local x = 0x78 -- unicodes['x'] + local x = 0x0078 if x then local x = descriptions[x] if x then @@ -2204,7 +2408,6 @@ local function copytotfm(data,cache_id) end report_otf() end - -- return { characters = characters, descriptions = descriptions, @@ -2234,14 +2437,23 @@ local function otftotfm(specification) if duplicates then local nofduplicates, nofduplicated = 0, 0 for parent, list in next, duplicates do - for i=1,#list do - local unicode = list[i] - if not descriptions[unicode] then - descriptions[unicode] = descriptions[parent] -- or copy + if type(list) == "table" then + local n = #list + for i=1,n do + local unicode = list[i] + if not descriptions[unicode] then + descriptions[unicode] = descriptions[parent] -- or copy + nofduplicated = nofduplicated + 1 + end + end + nofduplicates = nofduplicates + n + else + if not descriptions[list] then + descriptions[list] = descriptions[parent] -- or copy nofduplicated = nofduplicated + 1 end + nofduplicates = nofduplicates + 1 end - nofduplicates = nofduplicates + #list end if trace_otf and nofduplicated ~= nofduplicates then report_otf("%i extra duplicates copied out of %i",nofduplicated,nofduplicates) diff --git a/tex/context/base/font-otn.lua b/tex/context/base/font-otn.lua index c35dcf27c..32dc820d3 100644 --- a/tex/context/base/font-otn.lua +++ b/tex/context/base/font-otn.lua @@ -269,6 +269,7 @@ local currentfont = false local lookuptable = false local anchorlookups = false local lookuptypes = false +local lookuptags = false local handlers = { } local rlmode = 0 local featurevalue = false @@ -323,20 +324,20 @@ end local function cref(kind,chainname,chainlookupname,lookupname,index) -- not in the mood to alias f_ if index then - return formatters["feature %a, chain %a, sub %a, lookup %a, index %a"](kind,chainname,chainlookupname,lookupname,index) + return formatters["feature %a, chain %a, sub %a, lookup %a, index %a"](kind,chainname,chainlookupname,lookuptags[lookupname],index) elseif lookupname then - return formatters["feature %a, chain %a, sub %a, lookup %a"](kind,chainname,chainlookupname,lookupname) + return formatters["feature %a, chain %a, sub %a, lookup %a"](kind,chainname,chainlookupname,lookuptags[lookupname]) elseif chainlookupname then - return formatters["feature %a, chain %a, sub %a"](kind,chainname,chainlookupname) + return formatters["feature %a, chain %a, sub %a"](kind,lookuptags[chainname],lookuptags[chainlookupname]) elseif chainname then - return formatters["feature %a, chain %a"](kind,chainname) + return formatters["feature %a, chain %a"](kind,lookuptags[chainname]) else return formatters["feature %a"](kind) end end local function pref(kind,lookupname) - return formatters["feature %a, lookup %a"](kind,lookupname) + return formatters["feature %a, lookup %a"](kind,lookuptags[lookupname]) end -- We can assume that languages that use marks are not hyphenated. We can also assume @@ -1924,7 +1925,7 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq end else local i = 1 - repeat + while true do if skipped then while true do local char = getchar(start) @@ -1965,12 +1966,14 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq end end end - if start then + if i > nofchainlookups then + break + elseif start then start = getnext(start) else -- weird end - until i > nofchainlookups + end end else local replacements = ck[7] @@ -2169,6 +2172,7 @@ local function featuresprocessor(head,font,attr) anchorlookups = resources.lookup_to_anchor lookuptable = resources.lookups lookuptypes = resources.lookuptypes + lookuptags = resources.lookuptags currentfont = font rlmode = 0 @@ -2769,6 +2773,7 @@ local function prepare_contextchains(tfmdata) local rawdata = tfmdata.shared.rawdata local resources = rawdata.resources local lookuphash = resources.lookuphash + local lookuptags = resources.lookuptags local lookups = rawdata.lookups if lookups then for lookupname, lookupdata in next, rawdata.lookups do @@ -2782,7 +2787,7 @@ local function prepare_contextchains(tfmdata) report_prepare("unsupported format %a",format) elseif not validformat[lookuptype] then -- todo: dejavu-serif has one (but i need to see what use it has) - report_prepare("unsupported format %a, lookuptype %a, lookupname %a",format,lookuptype,lookupname) + report_prepare("unsupported format %a, lookuptype %a, lookupname %a",format,lookuptype,lookuptags[lookupname]) else local contexts = lookuphash[lookupname] if not contexts then @@ -2838,7 +2843,7 @@ local function prepare_contextchains(tfmdata) -- no rules end else - report_prepare("missing lookuptype for lookupname %a",lookupname) + report_prepare("missing lookuptype for lookupname %a",lookuptags[lookupname]) end end end diff --git a/tex/context/base/font-otp.lua b/tex/context/base/font-otp.lua index 60eee0738..63e4184c1 100644 --- a/tex/context/base/font-otp.lua +++ b/tex/context/base/font-otp.lua @@ -140,6 +140,11 @@ end -- return b -- end +-- beware: we cannot unpack and repack the same table because then sharing +-- interferes (we could catch this if needed) .. so for now: save, reload +-- and repack in such cases (never needed anyway) .. a tricky aspect is that +-- we then need to sort more thanks to random hashing + local function packdata(data) if data then -- stripdata(data) @@ -898,3 +903,4 @@ if otf.enhancers.register then end otf.enhancers.unpack = unpackdata -- used elsewhere +otf.enhancers.pack = packdata -- used elsewhere diff --git a/tex/context/base/font-tfm.lua b/tex/context/base/font-tfm.lua index 827d70586..14c130d10 100644 --- a/tex/context/base/font-tfm.lua +++ b/tex/context/base/font-tfm.lua @@ -114,6 +114,11 @@ local function read_from_tfm(specification) features.encoding = encoding end end + -- let's play safe: + properties.haskerns = true + properties.haslogatures = true + resources.unicodes = { } + resources.lookuptags = { } -- return tfmdata end diff --git a/tex/context/base/grph-epd.lua b/tex/context/base/grph-epd.lua index 4f9d46097..8dcb58b3d 100644 --- a/tex/context/base/grph-epd.lua +++ b/tex/context/base/grph-epd.lua @@ -22,4 +22,7 @@ function figures.mergegoodies(optionlist) if all or options[variables.layer] then codeinjections.mergeviewerlayers() end + if all or options[variables.bookmark] then + codeinjections.mergebookmarks() + end end diff --git a/tex/context/base/grph-epd.mkiv b/tex/context/base/grph-epd.mkiv index 58526fd44..444fa55a6 100644 --- a/tex/context/base/grph-epd.mkiv +++ b/tex/context/base/grph-epd.mkiv @@ -32,7 +32,7 @@ \c!offset=\v!overlay, \c!background={\v!foreground,system:graphics:epdf}] -\def\grph_epdf_add_overlay +\unexpanded\def\grph_epdf_add_overlay {\global\setbox\foundexternalfigure\vbox\bgroup \system_graphics_epdf{\box\foundexternalfigure}% \egroup} diff --git a/tex/context/base/grph-inc.lua b/tex/context/base/grph-inc.lua index f83c759b3..28ef5d462 100644 --- a/tex/context/base/grph-inc.lua +++ b/tex/context/base/grph-inc.lua @@ -38,6 +38,8 @@ The TeX-Lua mix is suboptimal. This has to do with the fact that we cannot run TeX code from within Lua. Some more functionality will move to Lua. ]]-- +-- todo: store loaded pages per pdf file someplace + local format, lower, find, match, gsub, gmatch = string.format, string.lower, string.find, string.match, string.gsub, string.gmatch local contains = table.contains local concat, insert, remove = table.concat, table.insert, table.remove @@ -67,6 +69,8 @@ local texsetbox = tex.setbox local hpack = node.hpack +local new_latelua = nodes.pool.latelua + local context = context local variables = interfaces.variables @@ -1172,6 +1176,13 @@ function checkers.generic(data) return data end +local nofimages = 0 +local pofimages = { } + +function figures.getrealpage(index) + return pofimages[index] or 0 +end + function includers.generic(data) local dr, du, ds = data.request, data.used, data.status -- here we set the 'natural dimensions' @@ -1195,7 +1206,18 @@ function includers.generic(data) if figure then local nr = figures.boxnumber -- it looks like we have a leak in attributes here .. todo - local box = hpack(images.node(figure)) -- images.node(figure) not longer valid + + nofimages = nofimages + 1 + ds.pageindex = nofimages + local image = images.node(figure) + local pager = new_latelua(function() + pofimages[nofimages] = pofimages[nofimages] or tex.count.realpageno -- so when reused we register the first one only + end) + image.next = pager + pager.prev = image + + local box = hpack(image) -- images.node(figure) not longer valid + indexed[figure.index] = figure box.width, box.height, box.depth = figure.width, figure.height, 0 -- new, hm, tricky, we need to do that in tex (yet) texsetbox(nr,box) diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index f3fd28b1d..3e620a6ca 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -145,6 +145,9 @@ patterns.utfbom_8 = utfbom_8 patterns.utf_16_be_nl = P("\000\r\000\n") + P("\000\r") + P("\000\n") -- P("\000\r") * (P("\000\n") + P(true)) + P("\000\n") patterns.utf_16_le_nl = P("\r\000\n\000") + P("\r\000") + P("\n\000") -- P("\r\000") * (P("\n\000") + P(true)) + P("\n\000") +patterns.utf_32_be_nl = P("\000\000\000\r\000\000\000\n") + P("\000\000\000\r") + P("\000\000\000\n") +patterns.utf_32_le_nl = P("\r\000\000\000\n\000\000\000") + P("\r\000\000\000") + P("\n\000\000\000") + patterns.utf8one = R("\000\127") patterns.utf8two = R("\194\223") * utf8next patterns.utf8three = R("\224\239") * utf8next * utf8next @@ -1014,3 +1017,75 @@ lpeg.patterns.stripzeros = stripper -- lpegmatch(stripper,str) -- print(#str, os.clock()-ts, lpegmatch(stripper,sample)) +-- for practical reasone we keep this here: + +local byte_to_HEX = { } +local byte_to_hex = { } +local byte_to_dec = { } -- for md5 +local hex_to_byte = { } + +for i=0,255 do + local H = format("%02X",i) + local h = format("%02x",i) + local d = format("%03i",i) + local c = char(i) + byte_to_HEX[c] = H + byte_to_hex[c] = h + byte_to_dec[c] = d + hex_to_byte[h] = c + hex_to_byte[H] = c +end + +local hextobyte = P(2)/hex_to_byte +local bytetoHEX = P(1)/byte_to_HEX +local bytetohex = P(1)/byte_to_hex +local bytetodec = P(1)/byte_to_dec +local hextobytes = Cs(hextobyte^0) +local bytestoHEX = Cs(bytetoHEX^0) +local bytestohex = Cs(bytetohex^0) +local bytestodec = Cs(bytetodec^0) + +patterns.hextobyte = hextobyte +patterns.bytetoHEX = bytetoHEX +patterns.bytetohex = bytetohex +patterns.bytetodec = bytetodec +patterns.hextobytes = hextobytes +patterns.bytestoHEX = bytestoHEX +patterns.bytestohex = bytestohex +patterns.bytestodec = bytestodec + +function string.toHEX(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end + +function string.tohex(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestohex,s) + end +end + +function string.todec(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestodec,s) + end +end + +function string.tobytes(s) + if not s or s == "" then + return s + else + return lpegmatch(hextobytes,s) + end +end + +-- local h = "ADFE0345" +-- local b = lpegmatch(patterns.hextobytes,h) +-- print(h,b,string.tohex(b),string.toHEX(b)) diff --git a/tex/context/base/l-md5.lua b/tex/context/base/l-md5.lua index 8ac20a5a5..00272c873 100644 --- a/tex/context/base/l-md5.lua +++ b/tex/context/base/l-md5.lua @@ -19,48 +19,38 @@ if not md5 then end local md5, file = md5, file -local gsub, format, byte = string.gsub, string.format, string.byte -local md5sum = md5.sum +local gsub = string.gsub -local function convert(str,fmt) - return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end)) -end - -if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end -if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end -if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end - --- local P, Cs, lpegmatch = lpeg.P, lpeg.Cs,lpeg.match --- --- if not md5.HEX then --- local function remap(chr) return format("%02X",byte(chr)) end --- function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end --- end +-- local gsub, format, byte = string.gsub, string.format, string.byte -- --- if not md5.hex then --- local function remap(chr) return format("%02x",byte(chr)) end --- function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end +-- local function convert(str,fmt) +-- return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end)) -- end -- --- if not md5.dec then --- local function remap(chr) return format("%03i",byte(chr)) end --- function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end --- end +-- if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end +-- if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end +-- if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end --- if not md5.HEX then --- local pattern_HEX = Cs( ( P(1) / function(chr) return format("%02X",byte(chr)) end)^0 ) --- function md5.HEX(str) return lpegmatch(pattern_HEX,md5.sum(str)) end --- end --- --- if not md5.hex then --- local pattern_hex = Cs( ( P(1) / function(chr) return format("%02x",byte(chr)) end)^0 ) --- function md5.hex(str) return lpegmatch(pattern_hex,md5.sum(str)) end --- end --- --- if not md5.dec then --- local pattern_dec = Cs( ( P(1) / function(chr) return format("%02i",byte(chr)) end)^0 ) --- function md5.dec(str) return lpegmatch(pattern_dec,md5.sum(str)) end --- end +do + + local patterns = lpeg and lpeg.patterns + + if patterns then + + local bytestoHEX = patterns.bytestoHEX + local bytestohex = patterns.bytestohex + local bytestodec = patterns.bytestodec + + local lpegmatch = lpeg.match + local md5sum = md5.sum + + if not md5.HEX then function md5.HEX(str) if str then return lpegmatch(bytestoHEX,md5sum(str)) end end end + if not md5.hex then function md5.hex(str) if str then return lpegmatch(bytestohex,md5sum(str)) end end end + if not md5.dec then function md5.dec(str) if str then return lpegmatch(bytestodec,md5sum(str)) end end end + + end + +end function file.needsupdating(oldname,newname,threshold) -- size modification access change local oldtime = lfs.attributes(oldname,"modification") diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua index e642106cc..3eb8b8514 100644 --- a/tex/context/base/l-table.lua +++ b/tex/context/base/l-table.lua @@ -54,7 +54,7 @@ local function compare(a,b) if ta == tb then return a < b else - return tostring(a) < tostring(b) + return tostring(a) < tostring(b) -- not that efficient end end diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index d75779267..b3a4c35e6 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -56,7 +56,6 @@ local p_utfbom = patterns.utfbom local p_newline = patterns.newline local p_whitespace = patterns.whitespace - if not unicode then unicode = { utf = utf } -- for a while @@ -526,7 +525,8 @@ end -- end function utf.remapper(mapping,option) -- static also returns a pattern - if type(mapping) == "table" then + local variant = type(mapping) + if variant == "table" then if option == "dynamic" then local pattern = false table.setmetatablenewindex(mapping,function(t,k,v) rawset(t,k,v) pattern = false end) @@ -553,6 +553,19 @@ function utf.remapper(mapping,option) -- static also returns a pattern end end, pattern end + elseif variant == "function" then + if option == "pattern" then + return Cs((p_utf8char/mapping + p_utf8char)^0) + else + local pattern = Cs((p_utf8char/mapping + p_utf8char)^0) + return function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end, pattern + end else -- is actually an error return function(str) @@ -669,285 +682,359 @@ end local utf16_to_utf8_be, utf16_to_utf8_le local utf32_to_utf8_be, utf32_to_utf8_le -local utf_16_be_linesplitter = patterns.utfbom_16_be^-1 * lpeg.tsplitat(patterns.utf_16_be_nl) -local utf_16_le_linesplitter = patterns.utfbom_16_le^-1 * lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_16_be_getbom = patterns.utfbom_16_be^-1 +local utf_16_le_getbom = patterns.utfbom_16_le^-1 +local utf_32_be_getbom = patterns.utfbom_32_be^-1 +local utf_32_le_getbom = patterns.utfbom_32_le^-1 + +local utf_16_be_linesplitter = utf_16_be_getbom * lpeg.tsplitat(patterns.utf_16_be_nl) +local utf_16_le_linesplitter = utf_16_le_getbom * lpeg.tsplitat(patterns.utf_16_le_nl) +local utf_32_be_linesplitter = utf_32_be_getbom * lpeg.tsplitat(patterns.utf_32_be_nl) +local utf_32_le_linesplitter = utf_32_le_getbom * lpeg.tsplitat(patterns.utf_32_le_nl) + +-- we have three possibilities: bytepairs (using tables), gmatch (using tables), gsub and +-- lpeg. Bytepairs are the fastert but as soon as we need to remove bombs and so the gain +-- is less due to more testing. Also, we seldom have to convert utf16 so we don't care to +-- much about a few milliseconds more runtime. The lpeg variant is upto 20% slower but +-- still pretty fast. +-- +-- for historic resone we keep the bytepairs variants around .. beware they don't grab the +-- bom like the lpegs do so they're not dropins in the functions that follow +-- +-- utf16_to_utf8_be = function(s) +-- if not s then +-- return nil +-- elseif s == "" then +-- return "" +-- end +-- local result, r, more = { }, 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*left + right +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- return concat(result) +-- end +-- +-- utf16_to_utf8_be_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utf_16_be_linesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local s = t[i] +-- if s ~= "" then +-- local r, more = 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*left + right +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- t[i] = concat(result,"",1,r) -- we reused tmp, hence t +-- end +-- end +-- return t +-- end +-- +-- utf16_to_utf8_le = function(s) +-- if not s then +-- return nil +-- elseif s == "" then +-- return "" +-- end +-- local result, r, more = { }, 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*right + left +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- return concat(result) +-- end +-- +-- utf16_to_utf8_le_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utf_16_le_linesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local s = t[i] +-- if s ~= "" then +-- local r, more = 0, 0 +-- for left, right in bytepairs(s) do +-- if right then +-- local now = 256*right + left +-- if more > 0 then +-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +-- more = 0 +-- r = r + 1 +-- result[r] = utfchar(now) +-- elseif now >= 0xD800 and now <= 0xDBFF then +-- more = now +-- else +-- r = r + 1 +-- result[r] = utfchar(now) +-- end +-- end +-- end +-- t[i] = concat(result,"",1,r) -- we reused tmp, hence t +-- end +-- end +-- return t +-- end +-- +-- utf32_to_utf8_be_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utflinesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local r, more = 0, -1 +-- for a,b in bytepairs(t[i]) do +-- if a and b then +-- if more < 0 then +-- more = 256*256*256*a + 256*256*b +-- else +-- r = r + 1 +-- result[t] = utfchar(more + 256*a + b) +-- more = -1 +-- end +-- else +-- break +-- end +-- end +-- t[i] = concat(result,"",1,r) +-- end +-- return t +-- end +-- +-- utf32_to_utf8_le_t = function(t) +-- if not t then +-- return nil +-- elseif type(t) == "string" then +-- t = lpegmatch(utflinesplitter,t) +-- end +-- local result = { } -- we reuse result +-- for i=1,#t do +-- local r, more = 0, -1 +-- for a,b in bytepairs(t[i]) do +-- if a and b then +-- if more < 0 then +-- more = 256*b + a +-- else +-- r = r + 1 +-- result[t] = utfchar(more + 256*256*256*b + 256*256*a) +-- more = -1 +-- end +-- else +-- break +-- end +-- end +-- t[i] = concat(result,"",1,r) +-- end +-- return t +-- end --- we have three possibilities: +local more = 0 + +local p_utf16_to_utf8_be = C(1) * C(1) /function(left,right) + local now = 256*byte(left) + byte(right) + if more > 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + return utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + -- return "" + else + return utfchar(now) + end +end + +local p_utf16_to_utf8_le = C(1) * C(1) /function(right,left) + local now = 256*byte(left) + byte(right) + if more > 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + return utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + -- return "" + else + return utfchar(now) + end +end +local p_utf32_to_utf8_be = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d) + return utfchar(256*256*256*byte(a) + 256*256*byte(b) + 256*byte(c) + byte(d)) +end --- bytepairs: 0.048 --- gmatch : 0.069 --- lpeg : 0.089 (match time captures) +local p_utf32_to_utf8_le = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d) + return utfchar(256*256*256*byte(d) + 256*256*byte(c) + 256*byte(b) + byte(a)) +end -if bytepairs then +p_utf16_to_utf8_be = P(true) / function() more = 0 end * utf_16_be_getbom * Cs(p_utf16_to_utf8_be^0) +p_utf16_to_utf8_le = P(true) / function() more = 0 end * utf_16_le_getbom * Cs(p_utf16_to_utf8_le^0) +p_utf32_to_utf8_be = P(true) / function() more = 0 end * utf_32_be_getbom * Cs(p_utf32_to_utf8_be^0) +p_utf32_to_utf8_le = P(true) / function() more = 0 end * utf_32_le_getbom * Cs(p_utf32_to_utf8_le^0) - -- with a little bit more code we could include the linesplitter +patterns.utf16_to_utf8_be = p_utf16_to_utf8_be +patterns.utf16_to_utf8_le = p_utf16_to_utf8_le +patterns.utf32_to_utf8_be = p_utf32_to_utf8_be +patterns.utf32_to_utf8_le = p_utf32_to_utf8_le - utf16_to_utf8_be = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_be_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in bytepairs(t[i]) do - if right then - local now = 256*left + right - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t - end - return t +utf16_to_utf8_be = function(s) + if s and s ~= "" then + return lpegmatch(p_utf16_to_utf8_be,s) + else + return s end +end - utf16_to_utf8_le = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_le_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in bytepairs(t[i]) do - if right then - local now = 256*right + left - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t +utf16_to_utf8_be_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_16_be_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf16_to_utf8_be,s) end - return t end + return t +end - utf32_to_utf8_be = function(t) - if type(t) == "string" then - t = lpegmatch(utflinesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, -1 - for a,b in bytepairs(t[i]) do - if a and b then - if more < 0 then - more = 256*256*256*a + 256*256*b - else - r = r + 1 - result[t] = utfchar(more + 256*a + b) - more = -1 - end - else - break - end - end - t[i] = concat(result,"",1,r) - end - return t +utf16_to_utf8_le = function(s) + if s and s ~= "" then + return lpegmatch(p_utf16_to_utf8_le,s) + else + return s end +end - utf32_to_utf8_le = function(t) - if type(t) == "string" then - t = lpegmatch(utflinesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, -1 - for a,b in bytepairs(t[i]) do - if a and b then - if more < 0 then - more = 256*b + a - else - r = r + 1 - result[t] = utfchar(more + 256*256*256*b + 256*256*a) - more = -1 - end - else - break - end - end - t[i] = concat(result,"",1,r) +utf16_to_utf8_le_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_16_le_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf16_to_utf8_le,s) end - return t end + return t +end -else - - utf16_to_utf8_be = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_be_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in gmatch(t[i],"(.)(.)") do - if left == "\000" then -- experiment - r = r + 1 - result[r] = utfchar(byte(right)) - elseif right then - local now = 256*byte(left) + byte(right) - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t - end - return t +utf32_to_utf8_be = function(s) + if s and s ~= "" then + return lpegmatch(p_utf32_to_utf8_be,s) + else + return s end +end - utf16_to_utf8_le = function(t) - if type(t) == "string" then - t = lpegmatch(utf_16_le_linesplitter,t) - end - local result = { } -- we reuse result - for i=1,#t do - local r, more = 0, 0 - for left, right in gmatch(t[i],"(.)(.)") do - if right == "\000" then - r = r + 1 - result[r] = utfchar(byte(left)) - elseif right then - local now = 256*byte(right) + byte(left) - if more > 0 then - now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - more = 0 - r = r + 1 - result[r] = utfchar(now) - elseif now >= 0xD800 and now <= 0xDBFF then - more = now - else - r = r + 1 - result[r] = utfchar(now) - end - end - end - t[i] = concat(result,"",1,r) -- we reused tmp, hence t +utf32_to_utf8_be_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_32_be_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf32_to_utf8_be,s) end - return t end + return t +end - utf32_to_utf8_le = function() return { } end -- never used anyway - utf32_to_utf8_be = function() return { } end -- never used anyway - - -- the next one is slighty slower - - -- local result, lines, r, more = { }, { }, 0, 0 - -- - -- local simple = Cmt( - -- C(1) * C(1), function(str,p,left,right) - -- local now = 256*byte(left) + byte(right) - -- if more > 0 then - -- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - -- more = 0 - -- r = r + 1 - -- result[r] = utfchar(now) - -- elseif now >= 0xD800 and now <= 0xDBFF then - -- more = now - -- else - -- r = r + 1 - -- result[r] = utfchar(now) - -- end - -- return p - -- end - -- ) - -- - -- local complex = Cmt( - -- C(1) * C(1), function(str,p,left,right) - -- local now = 256*byte(left) + byte(right) - -- if more > 0 then - -- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong - -- more = 0 - -- r = r + 1 - -- result[r] = utfchar(now) - -- elseif now >= 0xD800 and now <= 0xDBFF then - -- more = now - -- else - -- r = r + 1 - -- result[r] = utfchar(now) - -- end - -- return p - -- end - -- ) - -- - -- local lineend = Cmt ( - -- patterns.utf_16_be_nl, function(str,p) - -- lines[#lines+1] = concat(result,"",1,r) - -- r, more = 0, 0 - -- return p - -- end - -- ) - -- - -- local be_1 = patterns.utfbom_16_be^-1 * (simple + complex)^0 - -- local be_2 = patterns.utfbom_16_be^-1 * (lineend + simple + complex)^0 - -- - -- utf16_to_utf8_be = function(t) - -- if type(t) == "string" then - -- local s = t - -- lines, r, more = { }, 0, 0 - -- lpegmatch(be_2,s) - -- if r > 0 then - -- lines[#lines+1] = concat(result,"",1,r) - -- end - -- result = { } - -- return lines - -- else - -- for i=1,#t do - -- r, more = 0, 0 - -- lpegmatch(be_1,t[i]) - -- t[i] = concat(result,"",1,r) - -- end - -- result = { } - -- return t - -- end - -- end +utf32_to_utf8_le = function(s) + if s and s ~= "" then + return lpegmatch(p_utf32_to_utf8_le,s) + else + return s + end +end +utf32_to_utf8_le_t = function(t) + if not t then + return nil + elseif type(t) == "string" then + t = lpegmatch(utf_32_le_linesplitter,t) + end + for i=1,#t do + local s = t[i] + if s ~= "" then + t[i] = lpegmatch(p_utf32_to_utf8_le,s) + end + end + return t end -utf.utf16_to_utf8_le = utf16_to_utf8_le -utf.utf16_to_utf8_be = utf16_to_utf8_be -utf.utf32_to_utf8_le = utf32_to_utf8_le -utf.utf32_to_utf8_be = utf32_to_utf8_be +utf.utf16_to_utf8_le_t = utf16_to_utf8_le_t +utf.utf16_to_utf8_be_t = utf16_to_utf8_be_t +utf.utf32_to_utf8_le_t = utf32_to_utf8_le_t +utf.utf32_to_utf8_be_t = utf32_to_utf8_be_t -function utf.utf8_to_utf8(t) +utf.utf16_to_utf8_le = utf16_to_utf8_le +utf.utf16_to_utf8_be = utf16_to_utf8_be +utf.utf32_to_utf8_le = utf32_to_utf8_le +utf.utf32_to_utf8_be = utf32_to_utf8_be + +function utf.utf8_to_utf8_t(t) return type(t) == "string" and lpegmatch(utflinesplitter,t) or t end -function utf.utf16_to_utf8(t,endian) - return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +function utf.utf16_to_utf8_t(t,endian) + return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t end -function utf.utf32_to_utf8(t,endian) - return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t +function utf.utf32_to_utf8_t(t,endian) + return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t end -local function little(c) - local b = byte(c) +local function little(b) if b < 0x10000 then return char(b%256,b/256) else @@ -957,8 +1044,7 @@ local function little(c) end end -local function big(c) - local b = byte(c) +local function big(b) if b < 0x10000 then return char(b/256,b%256) else @@ -968,18 +1054,10 @@ local function big(c) end end --- function utf.utf8_to_utf16(str,littleendian) --- if littleendian then --- return char(255,254) .. utfgsub(str,".",little) --- else --- return char(254,255) .. utfgsub(str,".",big) --- end --- end - -local l_remap = utf.remapper(little,"pattern") -local b_remap = utf.remapper(big,"pattern") +local l_remap = Cs((p_utf8byte/little+P(1)/"")^0) +local b_remap = Cs((p_utf8byte/big +P(1)/"")^0) -function utf.utf8_to_utf16_be(str,nobom) +local function utf8_to_utf16_be(str,nobom) if nobom then return lpegmatch(b_remap,str) else @@ -987,7 +1065,7 @@ function utf.utf8_to_utf16_be(str,nobom) end end -function utf.utf8_to_utf16_le(str,nobom) +local function utf8_to_utf16_le(str,nobom) if nobom then return lpegmatch(l_remap,str) else @@ -995,11 +1073,14 @@ function utf.utf8_to_utf16_le(str,nobom) end end +utf.utf8_to_utf16_be = utf8_to_utf16_be +utf.utf8_to_utf16_le = utf8_to_utf16_le + function utf.utf8_to_utf16(str,littleendian,nobom) if littleendian then - return utf.utf8_to_utf16_le(str,nobom) + return utf8_to_utf16_le(str,nobom) else - return utf.utf8_to_utf16_be(str,nobom) + return utf8_to_utf16_be(str,nobom) end end @@ -1030,16 +1111,16 @@ function utf.xstring(s) end function utf.toeight(str) - if not str then + if not str or str == "" then return nil end local utftype = lpegmatch(p_utfstricttype,str) if utftype == "utf-8" then - return sub(str,4) - elseif utftype == "utf-16-le" then - return utf16_to_utf8_le(str) + return sub(str,4) -- remove the bom elseif utftype == "utf-16-be" then - return utf16_to_utf8_ne(str) + return utf16_to_utf8_be(str) -- bom gets removed + elseif utftype == "utf-16-le" then + return utf16_to_utf8_le(str) -- bom gets removed else return str end diff --git a/tex/context/base/lang-def.mkiv b/tex/context/base/lang-def.mkiv index 5c1d6de9c..088f86eb8 100644 --- a/tex/context/base/lang-def.mkiv +++ b/tex/context/base/lang-def.mkiv @@ -510,7 +510,8 @@ \c!rightquote=’, \c!leftquotation=“, \c!rightquotation=”, - \c!date={서기,\v!year,년,\v!month,월,\v!day,일}] + % \c!date={서기,\space,\v!year,\labeltext{\v!year},\space,\v!month,\labeltext{\v!month},\space,\v!day,\labeltext{\v!day}}] + \c!date={\v!year,\labeltext{\v!year},\space,\v!month,\labeltext{\v!month},\space,\v!day,\labeltext{\v!day}}] % Greek diff --git a/tex/context/base/lang-txt.lua b/tex/context/base/lang-txt.lua index 4f9f242e6..0cfb5bcea 100644 --- a/tex/context/base/lang-txt.lua +++ b/tex/context/base/lang-txt.lua @@ -415,6 +415,27 @@ data.labels={ }, }, texts={ + ["year"]={ + labels={ + en="year", + nl="jaar", + kr="년", + }, + }, + ["month"]={ + labels={ + en="month", + nl="maand", + kr="월", + }, + }, + ["day"]={ + labels={ + en="day", + nl="dag", + kr="일", + }, + }, ["and"]={ labels={ af="", @@ -506,7 +527,7 @@ data.labels={ hu="április", it="aprile", ja="4", - kr="4월", + kr="4", la="Aprilis", lt="balandžio", nb="april", @@ -613,7 +634,7 @@ data.labels={ hu="augusztus", it="agosto", ja="8", - kr="8월", + kr="8", la="Augustus", lt="rugpjūčio", nb="august", @@ -756,7 +777,7 @@ data.labels={ hu="december", it="dicembre", ja="12", - kr="12월", + kr="12", la="December", lt="gruodžio", nb="desember", @@ -828,7 +849,7 @@ data.labels={ hu="február", it="febbraio", ja="2", - kr="2월", + kr="2", la="Februarius", lt="vasario", nb="februar", @@ -898,7 +919,7 @@ data.labels={ hu=",. ábra:", it="Fig. ", ja="図", - kr="그림", + kr="그림 ", la="Imago ", lt=", pav.", nb="Figur ", @@ -1053,6 +1074,7 @@ data.labels={ hr="vidi ispod", hu="lásd lejjebb", it="come mostrato sotto", + kr="이후로", la="", lt="kaip parodyta žemiau", nb="som vist under", @@ -1128,7 +1150,7 @@ data.labels={ hu="január", it="gennaio", ja="1", - kr="1월", + kr="1", la="Ianuarius", lt="sausio", nb="januar", @@ -1201,7 +1223,7 @@ data.labels={ hu="július", it="luglio", ja="7", - kr="7월", + kr="7", la="Iulius", lt="liepos", nb="juli", @@ -1273,7 +1295,7 @@ data.labels={ hu="június", it="giugno", ja="6", - kr="6월", + kr="6", la="Iunius", lt="birželio", nb="juni", @@ -1418,7 +1440,7 @@ data.labels={ hu="március", it="marzo", ja="3", - kr="3월", + kr="3", la="Martius", lt="kovo", nb="mars", @@ -1491,7 +1513,7 @@ data.labels={ hu="május", it="maggio", ja="5", - kr="5월", + kr="5", la="Maius", lt="gegužės", nb="mai", @@ -1600,7 +1622,7 @@ data.labels={ hu="november", it="novembre", ja="11", - kr="11월", + kr="11", la="November", lt="lapkričio", nb="november", @@ -1671,7 +1693,7 @@ data.labels={ hu="október", it="ottobre", ja="10", - kr="10월", + kr="10", la="October", lt="spalio", nb="oktober", @@ -1925,7 +1947,7 @@ data.labels={ hu="szeptember", it="settembre", ja="9", - kr="9월", + kr="9", la="September", lt="rugsėjo", nb="september", @@ -2143,7 +2165,7 @@ data.labels={ hu=",. táblázat:", it="Tabella ", ja="表", - kr="표", + kr="표 ", la="Tabula ", lt=", lentelė.", nb="Tabell ", @@ -2367,7 +2389,7 @@ data.labels={ hu="Ábrák", it="Figure", ja="図", - kr="그림", + kr="그림 ", la="Imagines", lt="Iliustracijos", nb="Figurer", @@ -2404,7 +2426,7 @@ data.labels={ hu="Grafikák", it="Grafici", ja="グラフ", - kr="그래픽", + kr="그래픽 ", la="Typi", lt="Graphics", nb="Bilde", @@ -2441,7 +2463,7 @@ data.labels={ hu="Index", it="Indice", ja="目次", - kr="색인", + kr="찾아보기", la="Indices", lt="Rodyklė", nb="Register", @@ -2549,6 +2571,7 @@ data.labels={ hr="Literatura", hu="Bibliográfia", it="Bibliografia", + kr="참고문헌", la="", lt="Literatūra", nb="", @@ -2585,7 +2608,7 @@ data.labels={ hu="Táblázatok", it="Tabelle", ja="机", - kr="표", + kr="표 ", la="Tabulae", lt="Lentelės", nb="Tabeller", diff --git a/tex/context/base/lang-url.mkiv b/tex/context/base/lang-url.mkiv index 8990dccd8..fd3bd3b0d 100644 --- a/tex/context/base/lang-url.mkiv +++ b/tex/context/base/lang-url.mkiv @@ -138,3 +138,31 @@ % \dorecurse{100}{\test{a} \test{ab} \test{abc} \test{abcd} \test{abcde} \test{abcdef}} \protect \endinput + +% \setuppapersize[A7] +% +% \unexpanded\def\WhateverA#1% +% {\dontleavehmode +% \begingroup +% \prehyphenchar"B7\relax +% \setbox\scratchbox\hbox{\tttf#1}% +% \prehyphenchar`-\relax +% \unhbox\scratchbox +% \endgroup} +% +% \unexpanded\def\WhateverB#1% +% {\dontleavehmode +% \begingroup +% \tttf +% \prehyphenchar\minusone +% % \localrightbox{\llap{_}}% +% \localrightbox{\llap{\smash{\lower1.5ex\hbox{\char"2192}}}}% +% \setbox\scratchbox\hbox{#1}% +% \prehyphenchar`-\relax +% \unhbox\scratchbox +% \endgroup} +% +% \begingroup \hsize1cm +% \WhateverA{thisisaboringandverylongcommand}\par +% \WhateverB{thisisaboringandverylongcommand}\par +% \endgroup diff --git a/tex/context/base/lpdf-ano.lua b/tex/context/base/lpdf-ano.lua index 14359e2c7..ab78ec0a1 100644 --- a/tex/context/base/lpdf-ano.lua +++ b/tex/context/base/lpdf-ano.lua @@ -1032,52 +1032,103 @@ function specials.action(var) end local function build(levels,start,parent,method) - local startlevel = levels[start][1] + local startlevel = levels[start].level local i, n = start, 0 local child, entry, m, prev, first, last, f, l while i and i <= #levels do - local li = levels[i] - local level, title, reference, open = li[1], li[2], li[3], li[4] - if level < startlevel then - pdfflushobject(child,entry) - return i, n, first, last - elseif level == startlevel then - if trace_bookmarks then - report_bookmark("%3i %w%s %s",reference.realpage,(level-1)*2,(open and "+") or "-",title) - end - local prev = child - child = pdfreserveobject() - if entry then - entry.Next = child and pdfreference(child) - pdfflushobject(prev,entry) - end - entry = pdfdictionary { - Title = pdfunicode(title), - Parent = parent, - Prev = prev and pdfreference(prev), - A = somedestination(reference.internal,reference.internal,reference.realpage), - } - -- entry.Dest = somedestination(reference.internal,reference.internal,reference.realpage) - if not first then first, last = child, child end - prev = child - last = prev - n = n + 1 + local current = levels[i] + if current.usedpage == false then + -- safeguard i = i + 1 - elseif i < #levels and level > startlevel then - i, m, f, l = build(levels,i,pdfreference(child),method) - entry.Count = (open and m) or -m - if m > 0 then - entry.First, entry.Last = pdfreference(f), pdfreference(l) - end else - -- missing intermediate level but ok - i, m, f, l = build(levels,i,pdfreference(child),method) - entry.Count = (open and m) or -m - if m > 0 then - entry.First, entry.Last = pdfreference(f), pdfreference(l) + local level = current.level + local title = current.title + local reference = current.reference + local opened = current.opened + local reftype = type(reference) + local variant = "unknown" + if reftype == "table" then + -- we're okay + variant = "list" + elseif reftype == "string" then + local resolved = references.identify("",reference) + local realpage = resolved and structures.references.setreferencerealpage(resolved) or 0 + if realpage > 0 then + variant = "realpage" + realpage = realpage + end + elseif reftype == "number" then + if reference > 0 then + variant = "realpage" + realpage = reference + end + else + -- error + end + if variant == "unknown" then + -- error, ignore + i = i + 1 + elseif level < startlevel then + if entry then + pdfflushobject(child,entry) + else + -- some error + end + return i, n, first, last + elseif level == startlevel then + if trace_bookmarks then + report_bookmark("%3i %w%s %s",reference.realpage,(level-1)*2,(opened and "+") or "-",title) + end + local prev = child + child = pdfreserveobject() + if entry then + entry.Next = child and pdfreference(child) + pdfflushobject(prev,entry) + end + local action = nil + if variant == "list" then + action = somedestination(reference.internal,reference.internal,reference.realpage) + elseif variant == "realpage" then + action = pagereferences[realpage] + end + entry = pdfdictionary { + Title = pdfunicode(title), + Parent = parent, + Prev = prev and pdfreference(prev), + A = action, + } + -- entry.Dest = somedestination(reference.internal,reference.internal,reference.realpage) + if not first then first, last = child, child end + prev = child + last = prev + n = n + 1 + i = i + 1 + elseif i < #levels and level > startlevel then + i, m, f, l = build(levels,i,pdfreference(child),method) + if entry then + entry.Count = (opened and m) or -m + if m > 0 then + entry.First = pdfreference(f) + entry.Last = pdfreference(l) + end + else + -- some error + end + else + -- missing intermediate level but ok + i, m, f, l = build(levels,i,pdfreference(child),method) + if entry then + entry.Count = (opened and m) or -m + if m > 0 then + entry.First = pdfreference(f) + entry.Last = pdfreference(l) + end + pdfflushobject(child,entry) + else + -- some error + end + return i, n, first, last end - pdfflushobject(child,entry) - return i, n, first, last end end pdfflushobject(child,entry) @@ -1085,8 +1136,7 @@ local function build(levels,start,parent,method) end function codeinjections.addbookmarks(levels,method) - if #levels > 0 then - structures.bookmarks.flatten(levels) -- dirty trick for lack of structure + if levels and #levels > 0 then local parent = pdfreserveobject() local _, m, first, last = build(levels,1,pdfreference(parent),method or "internal") local dict = pdfdictionary { diff --git a/tex/context/base/lpdf-epa.lua b/tex/context/base/lpdf-epa.lua index fd4d9eb7e..8ca568b76 100644 --- a/tex/context/base/lpdf-epa.lua +++ b/tex/context/base/lpdf-epa.lua @@ -10,30 +10,40 @@ if not modules then modules = { } end modules ['lpdf-epa'] = { -- change. local type, tonumber = type, tonumber -local format, gsub = string.format, string.gsub +local format, gsub, lower = string.format, string.gsub, string.lower local formatters = string.formatters ----- lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns -local trace_links = false trackers.register("figures.links", function(v) trace_links = v end) -local report_link = logs.reporter("backend","merging") +local trace_links = false trackers.register("figures.links", function(v) trace_links = v end) +local trace_outlines = false trackers.register("figures.outliness", function(v) trace_outlines = v end) + +local report_link = logs.reporter("backend","link") +local report_outline = logs.reporter("backend","outline") local backends = backends local lpdf = lpdf local context = context +local nameonly = file.nameonly + local variables = interfaces.variables local codeinjections = backends.pdf.codeinjections ----- urlescaper = lpegpatterns.urlescaper ----- utftohigh = lpegpatterns.utftohigh local escapetex = characters.filters.utf.private.escape +local bookmarks = structures.bookmarks + local layerspec = { -- predefining saves time "epdflinks" } +local f_namespace = formatters["lpdf-epa-%s-"] + local function makenamespace(filename) - return format("lpdf-epa-%s-",file.removesuffix(file.basename(filename))) + filename = gsub(lower(nameonly(filename)),"[^%a%d]+","-") + return f_namespace(filename) end local function add_link(x,y,w,h,destination,what) @@ -71,7 +81,7 @@ local function link_goto(x,y,w,h,document,annotation,pagedata,namespace) if type(destination) == "string" then local destinations = document.destinations local wanted = destinations[destination] - destination = wanted and wanted.D + destination = wanted and wanted.D -- is this ok? isn't it destination already a string? if destination then what = "named" end end local pagedata = destination and destination[1] @@ -131,7 +141,7 @@ function codeinjections.mergereferences(specification) end if specification then local fullname = specification.fullname - local document = lpdf.epdf.load(fullname) + local document = lpdf.epdf.load(fullname) -- costs time if document then local pagenumber = specification.page or 1 local xscale = specification.yscale or 1 @@ -139,22 +149,31 @@ function codeinjections.mergereferences(specification) local size = specification.size or "crop" -- todo local pagedata = document.pages[pagenumber] local annotations = pagedata and pagedata.Annots + local namespace = makenamespace(fullname) + local reference = namespace .. pagenumber if annotations and annotations.n > 0 then - local namespace = format("lpdf-epa-%s-",file.removesuffix(file.basename(fullname))) - local reference = namespace .. pagenumber - local mediabox = pagedata.MediaBox - local llx, lly, urx, ury = mediabox[1], mediabox[2], mediabox[3], mediabox[4] - local width, height = xscale * (urx - llx), yscale * (ury - lly) -- \\overlaywidth, \\overlayheight + local mediabox = pagedata.MediaBox + local llx = mediabox[1] + local lly = mediabox[2] + local urx = mediabox[3] + local ury = mediabox[4] + local width = xscale * (urx - llx) -- \\overlaywidth, \\overlayheight + local height = yscale * (ury - lly) -- \\overlaywidth, \\overlayheight context.definelayer( { "epdflinks" }, { height = height.."bp" , width = width.."bp" }) for i=1,annotations.n do local annotation = annotations[i] if annotation then - local subtype = annotation.Subtype + local subtype = annotation.Subtype local rectangle = annotation.Rect - local a_llx, a_lly, a_urx, a_ury = rectangle[1], rectangle[2], rectangle[3], rectangle[4] - local x, y = xscale * (a_llx - llx), yscale * (a_lly - lly) - local w, h = xscale * (a_urx - a_llx), yscale * (a_ury - a_lly) - if subtype == "Link" then + local a_llx = rectangle[1] + local a_lly = rectangle[2] + local a_urx = rectangle[3] + local a_ury = rectangle[4] + local x = xscale * (a_llx - llx) + local y = yscale * (a_lly - lly) + local w = xscale * (a_urx - a_llx) + local h = yscale * (a_ury - a_lly) + if subtype == "Link" then local a = annotation.A if a then local linktype = a.S @@ -168,7 +187,7 @@ function codeinjections.mergereferences(specification) report_link("unsupported link annotation %a",linktype) end else - report_link("mising link annotation") + report_link("missing link annotation") end elseif trace_links then report_link("unsupported annotation %a",subtype) @@ -178,21 +197,21 @@ function codeinjections.mergereferences(specification) end end context.flushlayer { "epdflinks" } - -- context("\\gdef\\figurereference{%s}",reference) -- global - context.setgvalue("figurereference",reference) -- global - if trace_links then - report_link("setting figure reference to %a",reference) - end - specification.reference = reference - return namespace end + -- moved outside previous test + context.setgvalue("figurereference",reference) -- global + if trace_links then + report_link("setting figure reference to %a",reference) + end + specification.reference = reference + return namespace end end return ""-- no namespace, empty, not nil end function codeinjections.mergeviewerlayers(specification) - -- todo: parse included page for layers + -- todo: parse included page for layers .. or only for whole document inclusion if true then return end @@ -204,7 +223,7 @@ function codeinjections.mergeviewerlayers(specification) local fullname = specification.fullname local document = lpdf.epdf.load(fullname) if document then - local namespace = format("lpdf:epa:%s:",file.removesuffix(file.basename(fullname))) + local namespace = makenamespace(fullname) local layers = document.layers if layers then for i=1,layers.n do @@ -232,3 +251,160 @@ function codeinjections.mergeviewerlayers(specification) end end +-- new: for taco + +-- Beware, bookmarks can be in pdfdoc encoding or in unicode. However, in mkiv we +-- write out the strings in unicode (hex). When we read them in, we check for a bom +-- and convert to utf. + +function codeinjections.getbookmarks(filename) + + -- The first version built a nested tree and flattened that afterwards ... but I decided + -- to keep it simple and flat. + + local list = bookmarks.extras.get(filename) + + if list then + return list + else + list = { } + end + + local document = nil + + if lfs.isfile(filename) then + document = lpdf.epdf.load(filename) + else + report_outline("unknown file %a",filename) + bookmarks.extras.register(filename,list) + return list + end + + local outlines = document.Catalog.Outlines + local pages = document.pages + local nofpages = pages.n -- we need to access once in order to initialize + local destinations = document.destinations + + -- I need to check this destination analyzer with the one in annotations .. best share + -- code (and not it's inconsistent). On the todo list ... + + local function setdestination(current,entry) + local destination = nil + local action = current.A + if action then + local subtype = action.S + if subtype == "GoTo" then + destination = action.D + if type(destination) == "string" then + entry.destination = destination + destination = destinations[destination] + local pagedata = destination and destination[1] + if pagedata then + entry.realpage = pagedata.number + end + else + -- maybe + end + else + -- maybe + end + else + local destination = current.Dest + if destination then + if type(destination) == "string" then + local wanted = destinations[destination] + destination = wanted and wanted.D + if destination then + entry.destination = destination + end + else + local pagedata = destination and destination[1] + if pagedata and pagedata.Type == "Page" then + entry.realpage = pagedata.number + end + end + end + end + end + + local function traverse(current,depth) + while current do + -- local title = current.Title + local title = current("Title") -- can be pdfdoc or unicode + if title then + local entry = { + level = depth, + title = title, + } + list[#list+1] = entry + setdestination(current,entry) + if trace_outlines then + report_outline("%w%s",2*depth,title) + end + end + local first = current.First + if first then + local current = first + while current do + local title = current.Title + if title and trace_outlines then + report_outline("%w%s",2*depth,title) + end + local entry = { + level = depth, + title = title, + } + setdestination(current,entry) + list[#list+1] = entry + traverse(current.First,depth+1) + current = current.Next + end + end + current = current.Next + end + end + + if outlines then + if trace_outlines then + report_outline("outline of %a:",document.filename) + report_outline() + end + traverse(outlines,0) + if trace_outlines then + report_outline() + end + elseif trace_outlines then + report_outline("no outline in %a",document.filename) + end + + bookmarks.extras.register(filename,list) + + return list + +end + +function codeinjections.mergebookmarks(specification) + -- codeinjections.getbookmarks(document) + if not specification then + specification = figures and figures.current() + specification = specification and specification.status + end + if specification then + local fullname = specification.fullname + local bookmarks = backends.codeinjections.getbookmarks(fullname) + local realpage = tonumber(specification.page) or 1 + for i=1,#bookmarks do + local b = bookmarks[i] + if not b.usedpage then + if b.realpage == realpage then + if trace_options then + report_outline("using %a at page %a of file %a",b.title,realpage,fullname) + end + b.usedpage = true + b.section = structures.sections.currentsectionindex() + b.pageindex = specification.pageindex + end + end + end + end +end diff --git a/tex/context/base/lpdf-epd.lua b/tex/context/base/lpdf-epd.lua index c37336d07..44e1a04c2 100644 --- a/tex/context/base/lpdf-epd.lua +++ b/tex/context/base/lpdf-epd.lua @@ -6,32 +6,126 @@ if not modules then modules = { } end modules ['lpdf-epd'] = { license = "see context related readme files" } --- This is an experimental layer around the epdf library. The reason for --- this layer is that I want to be independent of the library (which --- implements a selection of what a file provides) and also because I --- want an interface closer to Lua's table model while the API stays --- close to the original xpdf library. Of course, after prototyping a --- solution, we can optimize it using the low level epdf accessors. - --- It will be handy when we have a __length and __next that can trigger --- the resolve till then we will provide .n as #. +-- This is an experimental layer around the epdf library. The reason for this layer is that +-- I want to be independent of the library (which implements a selection of what a file +-- provides) and also because I want an interface closer to Lua's table model while the API +-- stays close to the original xpdf library. Of course, after prototyping a solution, we can +-- optimize it using the low level epdf accessors. However, not all are accessible (this will +-- be fixed). +-- +-- It will be handy when we have a __length and __next that can trigger the resolve till then +-- we will provide .n as #; maybe in Lua 5.3 or later. +-- +-- As there can be references to the parent we cannot expand a tree. I played with some +-- expansion variants but it does not pay off; adding extra checks is nto worth the trouble. +-- +-- The document stays over. In order to free memory one has to explicitly onload the loaded +-- document. +-- +-- We have much more checking then needed in the prepare functions because occasionally +-- we run into bugs in poppler or the epdf interface. It took us a while to realize that +-- there was a long standing gc issue the on long runs with including many pages could +-- crash the analyzer. +-- +-- Normally a value is fetched by key, as in foo.Title but as it can be in pdfdoc encoding +-- a safer bet is foo("Title") which will return a decoded string (or the original if it +-- already was unicode). + +local setmetatable, rawset, rawget, type = setmetatable, rawset, rawget, type +local tostring, tonumber = tostring, tonumber +local lower, match, char, byte, find = string.lower, string.match, string.char, string.byte, string.find +local abs = math.abs +local concat = table.concat +local toutf, toeight, utfchar = string.toutf, utf.toeight, utf.char + +local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns +local P, C, S, R, Ct, Cc, V, Carg, Cs = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V, lpeg.Carg, lpeg.Cs + +local epdf = epdf + lpdf = lpdf or { } +local lpdf = lpdf +lpdf.epdf = { } + +local report_epdf = logs.reporter("epdf") + +local getDict, getArray, getReal, getNum, getString, getBool, getName, getRef, getRefNum +local getType, getTypeName +local dictGetLength, dictGetVal, dictGetValNF, dictGetKey +local arrayGetLength, arrayGetNF, arrayGet +local streamReset, streamGetDict, streamGetChar + +do + local object = epdf.Object() + -- + getDict = object.getDict + getArray = object.getArray + getReal = object.getReal + getNum = object.getNum + getString = object.getString + getBool = object.getBool + getName = object.getName + getRef = object.getRef + getRefNum = object.getRefNum + -- + getType = object.getType + getTypeName = object.getTypeName + -- + streamReset = object.streamReset + streamGetDict = object.streamGetDict + streamGetChar = object.streamGetChar + -- +end --- As there can be references to the parent we cannot expand a tree. I --- played with some expansion variants but it does to pay off. +local function initialize_methods(xref) + local dictionary = epdf.Dict(xref) + local array = epdf.Array(xref) + -- + dictGetLength = dictionary.getLength + dictGetVal = dictionary.getVal + dictGetValNF = dictionary.getValNF + dictGetKey = dictionary.getKey + -- + arrayGetLength = array.getLength + arrayGetNF = array.getNF + arrayGet = array.get + -- + -- report_epdf("initializing lpdf.epdf library") + -- + initialize_methods = function() + -- already done + end +end --- Maybe we need a close(). --- We cannot access all destinations in one run. +local typenames = { [0] = + "boolean", + "integer", + "real", + "string", + "name", + "null", + "array", + "dictionary", + "stream", + "ref", + "cmd", + "error", + "eof", + "none", + "integer64", +} -local setmetatable, rawset, rawget, tostring, tonumber = setmetatable, rawset, rawget, tostring, tonumber -local lower, match, char, find, sub = string.lower, string.match, string.char, string.find, string.sub -local concat = table.concat -local toutf = string.toutf +local typenumbers = table.swapped(typenames) -local report_epdf = logs.reporter("epdf") +local null_code = typenumbers.null +local ref_code = typenumbers.ref --- a bit of protection +local function fatal_error(...) + report_epdf(...) + report_epdf("aborting job in order to avoid crash") + os.exit() +end -local limited = false +local limited = false -- a bit of protection directives.register("system.inputmode", function(v) if not limited then @@ -43,87 +137,167 @@ directives.register("system.inputmode", function(v) end end) --- +-- epdf is the built-in library function epdf.type(o) local t = lower(match(tostring(o),"[^ :]+")) return t or "?" end -lpdf = lpdf or { } -local lpdf = lpdf +local checked_access -lpdf.epdf = { } +-- dictionaries (can be optimized: ... resolve and redefine when all locals set) -local checked_access +local frompdfdoc = lpdf.frompdfdoc + +local function get_flagged(t,f,k) + local fk = f[k] + if not fk then + return t[k] + elseif fk == "rawtext" then + return frompdfdoc(t[k]) + else -- no other flags yet + return t[k] + end +end -local function prepare(document,d,t,n,k,mt) +local function prepare(document,d,t,n,k,mt,flags) for i=1,n do - local v = d:getVal(i) - local r = d:getValNF(i) - if r:getTypeName() == "ref" then - r = r:getRef().num - local c = document.cache[r] - if c then - -- + local v = dictGetVal(d,i) + if v then + local r = dictGetValNF(d,i) + local kind = getType(v) + if kind == null_code then + -- ignore else - c = checked_access[v:getTypeName()](v,document,r) - if c then - document.cache[r] = c - document.xrefs[c] = r + local key = dictGetKey(d,i) + if kind then + if r and getType(r) == ref_code then + local objnum = getRefNum(r) + local cached = document.__cache__[objnum] + if not cached then + cached = checked_access[kind](v,document,objnum,mt) + if c then + document.__cache__[objnum] = cached + document.__xrefs__[cached] = objnum + end + end + t[key] = cached + else + local v, flag = checked_access[kind](v,document) + t[key] = v + if flag then + flags[key] = flag -- flags + end + end + else + report_epdf("warning: nil value for key %a in dictionary",key) end end - t[d:getKey(i)] = c else - t[d:getKey(i)] = checked_access[v:getTypeName()](v,document) + fatal_error("error: invalid value at index %a in dictionary of %a",i,document.filename) end end - getmetatable(t).__index = nil -- ?? weird -setmetatable(t,mt) + if mt then + setmetatable(t,mt) + else + getmetatable(t).__index = nil + end return t[k] end -local function some_dictionary(d,document,r,mt) - local n = d and d:getLength() or 0 +local function some_dictionary(d,document) + local n = d and dictGetLength(d) or 0 + if n > 0 then + local t = { } + local f = { } + setmetatable(t, { + __index = function(t,k) + return prepare(document,d,t,n,k,_,_,f) + end, + __call = function(t,k) + return get_flagged(t,f,k) + end, + } ) + return t + end +end + +local function get_dictionary(object,document,r,mt) + local d = getDict(object) + local n = d and dictGetLength(d) or 0 if n > 0 then local t = { } - setmetatable(t, { __index = function(t,k) return prepare(document,d,t,n,k,mt) end } ) + local f = { } + setmetatable(t, { + __index = function(t,k) + return prepare(document,d,t,n,k,mt,f) + end, + __call = function(t,k) + return get_flagged(t,f,k) + end, + } ) return t end end -local done = { } +-- arrays (can be optimized: ... resolve and redefine when all locals set) local function prepare(document,a,t,n,k) for i=1,n do - local v = a:get(i) - local r = a:getNF(i) - if v:getTypeName() == "null" then - -- TH: weird, but appears possible - elseif r:getTypeName() == "ref" then - r = r:getRef().num - local c = document.cache[r] - if c then - -- + local v = arrayGet(a,i) + if v then + local kind = getType(v) + if kind == null_code then + -- ignore + elseif kind then + local r = arrayGetNF(a,i) + if r and getType(r) == ref_code then + local objnum = getRefNum(r) + local cached = document.__cache__[objnum] + if not cached then + cached = checked_access[kind](v,document,objnum) + document.__cache__[objnum] = cached + document.__xrefs__[cached] = objnum + end + t[i] = cached + else + t[i] = checked_access[kind](v,document) + end else - c = checked_access[v:getTypeName()](v,document,r) - document.cache[r] = c - document.xrefs[c] = r + report_epdf("warning: nil value for index %a in array",i) end - t[i] = c else - t[i] = checked_access[v:getTypeName()](v,document) + fatal_error("error: invalid value at index %a in array of %a",i,document.filename) end end getmetatable(t).__index = nil return t[k] end -local function some_array(a,document,r) - local n = a and a:getLength() or 0 +local function some_array(a,document) + local n = a and arrayGetLength(a) or 0 + if n > 0 then + local t = { n = n } + setmetatable(t, { + __index = function(t,k) + return prepare(document,a,t,n,k) + end + } ) + return t + end +end + +local function get_array(object,document) + local a = getArray(object) + local n = a and arrayGetLength(a) or 0 if n > 0 then local t = { n = n } - setmetatable(t, { __index = function(t,k) return prepare(document,a,t,n,k) end } ) + setmetatable(t, { + __index = function(t,k) + return prepare(document,a,t,n,k) + end + } ) return t end end @@ -131,9 +305,9 @@ end local function streamaccess(s,_,what) if not what or what == "all" or what == "*all" then local t, n = { }, 0 - s:streamReset() + streamReset(s) while true do - local c = s:streamGetChar() + local c = streamGetChar(s) if c < 0 then break else @@ -145,56 +319,95 @@ local function streamaccess(s,_,what) end end -local function some_stream(d,document,r) +local function get_stream(d,document) if d then - d:streamReset() - local s = some_dictionary(d:streamGetDict(),document,r) + streamReset(d) + local s = some_dictionary(streamGetDict(d),document) getmetatable(s).__call = function(...) return streamaccess(d,...) end return s end end --- we need epdf.boolean(v) in addition to v:getBool() [dictionary, array, stream, real, integer, string, boolean, name, ref, null] - -checked_access = { - dictionary = function(d,document,r) - return some_dictionary(d:getDict(),document,r) - end, - array = function(a,document,r) - return some_array(a:getArray(),document,r) - end, - stream = function(v,document,r) - return some_stream(v,document,r) - end, - real = function(v) - return v:getReal() - end, - integer = function(v) - return v:getNum() - end, - string = function(v) - return toutf(v:getString()) - end, - boolean = function(v) - return v:getBool() - end, - name = function(v) - return v:getName() - end, - ref = function(v) - return v:getRef() - end, - null = function() - return nil - end, -} +-- We need to convert the string from utf16 although there is no way to +-- check if we have a regular string starting with a bom. So, we have +-- na dilemma here: a pdf doc encoded string can be invalid utf. --- checked_access.real = epdf.real --- checked_access.integer = epdf.integer --- checked_access.string = epdf.string --- checked_access.boolean = epdf.boolean --- checked_access.name = epdf.name --- checked_access.ref = epdf.ref +-- : implicit 0 appended if odd +-- (byte encoded) : \( \) \\ escaped +-- +-- : utf16be +-- +-- \r \r \t \b \f \( \) \\ \NNN and \ : append next line +-- +-- the getString function gives back bytes so we don't need to worry about +-- the hex aspect. + +local u_pattern = lpeg.patterns.utfbom_16_be * lpeg.patterns.utf16_to_utf8_be +local b_pattern = lpeg.patterns.hextobytes + +local function get_string(v) + -- the toutf function only converts a utf16 string and leves the original + -- untouched otherwise; one might want to apply lpdf.frompdfdoc to a + -- non-unicode string + local s = getString(v) + if not s or s == "" then + return "" + end + local u = lpegmatch(u_pattern,s) + if u then + return u -- , "unicode" + end + local b = lpegmatch(b_pattern,s) + if b then + return b, "rawtext" + end + return s, "rawtext" +end + +local function get_null() + return nil +end + +-- we have dual access: by typenumber and by typename + +local function invalidaccess(k,document) + local fullname = type(document) == "table" and document.fullname + if fullname then + fatal_error("error, asking for key %a in checker of %a",k,fullname) + else + fatal_error("error, asking for key %a in checker",k) + end +end + +checked_access = table.setmetatableindex(function(t,k) + return function(v,document) + invalidaccess(k,document) + end +end) + +checked_access[typenumbers.boolean] = getBool +checked_access[typenumbers.integer] = getNum +checked_access[typenumbers.real] = getReal +checked_access[typenumbers.string] = get_string -- getString +checked_access[typenumbers.name] = getName +checked_access[typenumbers.null] = get_null +checked_access[typenumbers.array] = get_array -- d,document,r +checked_access[typenumbers.dictionary] = get_dictionary -- d,document,r +checked_access[typenumbers.stream] = get_stream +checked_access[typenumbers.ref] = getRef + +for i=0,#typenames do + local checker = checked_access[i] + if not checker then + checker = function() + return function(v,document) + invalidaccess(i,document) + end + end + checked_access[i] = checker + end + checked_access[typenames[i]] = checker +end local function getnames(document,n,target) -- direct if n then @@ -252,7 +465,6 @@ local function getlayers(document) local n = layers.n for i=1,n do local layer = layers[i] - -- print(document.xrefs[layer]) t[i] = layer.Name end t.n = n @@ -267,50 +479,33 @@ local function getstructure(document) end local function getpages(document,Catalog) - local data = document.data - local xrefs = document.xrefs - local cache = document.cache - local cata = data:getCatalog() - local xref = data:getXRef() - local pages = { } - local nofpages = cata:getNumPages() --- local function getpagestuff(pagenumber,k) --- if k == "MediaBox" then --- local pageobj = cata:getPage(pagenumber) --- local pagebox = pageobj:getMediaBox() --- return { pagebox.x1, pagebox.y1, pagebox.x2, pagebox.y2 } --- elseif k == "CropBox" then --- local pageobj = cata:getPage(pagenumber) --- local pagebox = pageobj:getMediaBox() --- return { pagebox.x1, pagebox.y1, pagebox.x2, pagebox.y2 } --- elseif k == "Resources" then --- print("todo page resources from parent") --- -- local pageobj = cata:getPage(pagenumber) --- -- local resources = pageobj:getResources() --- end --- end --- for pagenumber=1,nofpages do --- local mt = { __index = function(t,k) --- local v = getpagestuff(pagenumber,k) --- if v then --- t[k] = v --- end --- return v --- end } - local mt = { __index = Catalog.Pages } + local __data__ = document.__data__ + local __xrefs__ = document.__xrefs__ + local __cache__ = document.__cache__ + local __xref__ = document.__xref__ + -- + local catalog = __data__:getCatalog() + local pages = { } + local nofpages = catalog:getNumPages() + local metatable = { __index = Catalog.Pages } + -- for pagenumber=1,nofpages do - local pagereference = cata:getPageRef(pagenumber).num - local pagedata = some_dictionary(xref:fetch(pagereference,0):getDict(),document,pagereference,mt) + local pagereference = catalog:getPageRef(pagenumber).num + local pageobject = __xref__:fetch(pagereference,0) + local pagedata = get_dictionary(pageobject,document,pagereference,metatable) if pagedata then - pagedata.number = pagenumber - pages[pagenumber] = pagedata - xrefs[pagedata] = pagereference - cache[pagereference] = pagedata + -- rawset(pagedata,"number",pagenumber) + pagedata.number = pagenumber + pages[pagenumber] = pagedata + __xrefs__[pagedata] = pagereference + __cache__[pagereference] = pagedata else report_epdf("missing pagedata at slot %i",i) end end + -- pages.n = nofpages + -- return pages end @@ -337,19 +532,25 @@ function lpdf.epdf.load(filename) local document = loaded[filename] if not document then statistics.starttiming(lpdf.epdf) - local data = epdf.open(filename) -- maybe resolvers.find_file - if data then + local __data__ = epdf.open(filename) -- maybe resolvers.find_file + if __data__ then + local __xref__ = __data__:getXRef() document = { - filename = filename, - cache = { }, - xrefs = { }, - data = data, + filename = filename, + __cache__ = { }, + __xrefs__ = { }, + __fonts__ = { }, + __data__ = __data__, + __xref__ = __xref__, } - local Catalog = some_dictionary(data:getXRef():getCatalog():getDict(),document) - local Info = some_dictionary(data:getXRef():getDocInfo():getDict(),document) - document.Catalog = Catalog - document.Info = Info - -- document.catalog = Catalog + -- + initialize_methods(__xref__) + -- + local Catalog = some_dictionary(__xref__:getCatalog():getDict(),document) + local Info = some_dictionary(__xref__:getDocInfo():getDict(),document) + -- + document.Catalog = Catalog + document.Info = Info -- a few handy helper tables document.pages = delayed(document,"pages", function() return getpages(document,Catalog) end) document.destinations = delayed(document,"destinations", function() return getnames(document,Catalog.Names and Catalog.Names.Dests) end) @@ -362,24 +563,257 @@ function lpdf.epdf.load(filename) document = false end loaded[filename] = document + loaded[document] = document statistics.stoptiming(lpdf.epdf) -- print(statistics.elapsedtime(lpdf.epdf)) end - return document + return document or nil +end + +function lpdf.epdf.unload(filename) + local document = loaded[filename] + if document then + loaded[document] = nil + loaded[filename] = nil + end end -- for k, v in next, expand(t) do -function lpdf.epdf.expand(t) +local function expand(t) if type(t) == "table" then local dummy = t.dummy end return t end +-- for k, v in expanded(t) do + +local function expanded(t) + if type(t) == "table" then + local dummy = t.dummy + end + return next, t +end + +lpdf.epdf.expand = expand +lpdf.epdf.expanded = expanded + +-- we could resolve the text stream in one pass if we directly handle the +-- font but why should we complicate things + +local hexdigit = R("09","AF") +local numchar = ( P("\\") * ( (R("09")^3/tonumber) + C(1) ) ) + C(1) +local number = lpegpatterns.number / tonumber +local spaces = lpegpatterns.whitespace^1 +local keyword = P("/") * C(R("AZ","az","09")^1) +local operator = C((R("AZ","az")+P("'")+P('"'))^1) + +local grammar = P { "start", + start = (keyword + number + V("dictionary") + V("unicode") + V("string") + V("unicode")+ V("array") + spaces)^1, + array = P("[") * Ct(V("start")^1) * P("]"), + dictionary = P("<<") * Ct(V("start")^1) * P(">>"), + unicode = P("<") * Ct(Cc("hex") * C((1-P(">"))^1)) * P(">"), + string = P("(") * Ct(Cc("dec") * C((V("string")+numchar)^1)) * P(")"), -- untested +} + +local operation = Ct(grammar^1 * operator) +local parser = Ct((operation + P(1))^1) + +-- beginbfrange : +-- [ ] +-- beginbfchar : + +local fromsixteen = lpdf.fromsixteen -- maybe inline the lpeg ... but not worth it + +local function f_bfchar(t,a,b) + t[tonumber(a,16)] = fromsixteen(b) +end + +local function f_bfrange_1(t,a,b,c) + print("todo 1",a,b,c) + -- c is string + -- todo t[tonumber(a,16)] = fromsixteen(b) +end + +local function f_bfrange_2(t,a,b,c) + print("todo 2",a,b,c) + -- c is table + -- todo t[tonumber(a,16)] = fromsixteen(b) +end + +local optionals = spaces^0 +local hexstring = optionals * P("<") * C((1-P(">"))^1) * P(">") +local bfchar = Carg(1) * hexstring * hexstring / f_bfchar +local bfrange = Carg(1) * hexstring * hexstring * hexstring / f_bfrange_1 + + Carg(1) * hexstring * hexstring * optionals * P("[") * Ct(hexstring^1) * optionals * P("]") / f_bfrange_2 +local fromunicode = ( + P("beginbfchar" ) * bfchar ^1 * optionals * P("endbfchar" ) + + P("beginbfrange") * bfrange^1 * optionals * P("endbfrange") + + spaces + + P(1) +)^1 * Carg(1) + +local function analyzefonts(document,resources) -- unfinished + local fonts = document.__fonts__ + if resources then + local fontlist = resources.Font + if fontlist then + for id, data in expanded(fontlist) do + if not fonts[id] then + -- a quck hack ... I will look into it more detail if I find a real + -- -application for it + local tounicode = data.ToUnicode() + if tounicode then + tounicode = lpegmatch(fromunicode,tounicode,1,{}) + end + fonts[id] = { + tounicode = type(tounicode) == "table" and tounicode or { } + } + table.setmetatableindex(fonts[id],"self") + end + end + end + end + return fonts +end + +local more = 0 +local unic = nil -- cheaper than passing each time as Carg(1) + +local p_hex_to_utf = C(4) / function(s) -- needs checking ! + local now = tonumber(s,16) + if more > 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + return unic[now] or utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + -- return "" + else + return unic[now] or utfchar(now) + end +end + +local p_dec_to_utf = C(1) / function(s) -- needs checking ! + local now = byte(s) + return unic[now] or utfchar(now) +end + +local p_hex_to_utf = P(true) / function() more = 0 end * Cs(p_hex_to_utf^1) +local p_dec_to_utf = P(true) / function() more = 0 end * Cs(p_dec_to_utf^1) + +function lpdf.epdf.getpagecontent(document,pagenumber) + + local page = document.pages[pagenumber] + + if not page then + return + end + + local fonts = analyzefonts(document,page.Resources) + + local content = page.Contents() or "" + local list = lpegmatch(parser,content) + local font = nil + -- local unic = nil + + for i=1,#list do + local entry = list[i] + local size = #entry + local operator = entry[size] + if operator == "Tf" then + font = fonts[entry[1]] + unic = font.tounicode + elseif operator == "TJ" then -- { array, TJ } + local list = entry[1] + for i=1,#list do + local li = list[i] + if type(li) == "table" then + if li[1] == "hex" then + list[i] = lpegmatch(p_hex_to_utf,li[2]) + else + list[i] = lpegmatch(p_dec_to_utf,li[2]) + end + else + -- kern + end + end + elseif operator == "Tj" or operator == "'" or operator == '"' then -- { string, Tj } { string, ' } { n, m, string, " } + local list = entry[size-1] + if list[1] == "hex" then + list[2] = lpegmatch(p_hex_to_utf,li[2],1,unic) + else + list[2] = lpegmatch(p_dec_to_utf,li[2],1,unic) + end + end + end + + unic = nil -- can be collected + + return list + +end + +-- This is also an experiment. When I really neet it I can improve it, fo rinstance +-- with proper position calculating. It might be usefull for some search or so. + +local softhyphen = utfchar(0xAD) .. "$" +local linefactor = 1.3 + +function lpdf.epdf.contenttotext(document,list) -- maybe signal fonts + local last_y = 0 + local last_f = 0 + local text = { } + local last = 0 + + for i=1,#list do + local entry = list[i] + local size = #entry + local operator = entry[size] + if operator == "Tf" then + last_f = entry[2] + elseif operator == "TJ" then + local list = entry[1] + for i=1,#list do + local li = list[i] + if type(li) == "string" then + last = last + 1 + text[last] = li + elseif li < -50 then + last = last + 1 + text[last] = " " + end + end + line = concat(list) + elseif operator == "Tj" then + last = last + 1 + text[last] = entry[size-1] + elseif operator == "cm" or operator == "Tm" then + local ty = entry[6] + local dy = abs(last_y - ty) + if dy > linefactor*last_f then + if last > 0 then + if find(text[last],softhyphen) then + -- ignore + else + last = last + 1 + text[last] = "\n" + end + end + end + last_y = ty + end + end + + return concat(text) +end + +-- document.Catalog.StructTreeRoot.ParentTree.Nums[2][1].A.P[1]) + -- helpers -- function lpdf.epdf.getdestinationpage(document,name) --- local destination = document.data:findDest(name) +-- local destination = document.__data__:findDest(name) -- return destination and destination.number -- end diff --git a/tex/context/base/lpdf-fld.lua b/tex/context/base/lpdf-fld.lua index 414562ad5..4f15b3c7b 100644 --- a/tex/context/base/lpdf-fld.lua +++ b/tex/context/base/lpdf-fld.lua @@ -280,10 +280,8 @@ end local pdfdocencodingvector, pdfdocencodingcapsule --- The pdf doc encoding vector is needed in order to --- trigger propper unicode. Interesting is that when --- a glyph is not in the vector, it is still visible --- as it is taken from some other font. Messy. +-- The pdf doc encoding vector is needed in order to trigger propper unicode. Interesting is that when +-- a glyph is not in the vector, it is still visible as it is taken from some other font. Messy. -- To be checked: only when text/line fields. diff --git a/tex/context/base/lpdf-ini.lua b/tex/context/base/lpdf-ini.lua index b58008e7f..76fa5cbb2 100644 --- a/tex/context/base/lpdf-ini.lua +++ b/tex/context/base/lpdf-ini.lua @@ -6,9 +6,11 @@ if not modules then modules = { } end modules ['lpdf-ini'] = { license = "see context related readme files" } +-- beware of "too many locals" here + local setmetatable, getmetatable, type, next, tostring, tonumber, rawset = setmetatable, getmetatable, type, next, tostring, tonumber, rawset local char, byte, format, gsub, concat, match, sub, gmatch = string.char, string.byte, string.format, string.gsub, table.concat, string.match, string.sub, string.gmatch -local utfchar, utfvalues = utf.char, utf.values +local utfchar, utfbyte, utfvalues = utf.char, utf.byte, utf.values local sind, cosd, floor, max, min = math.sind, math.cosd, math.floor, math.max, math.min local lpegmatch, P, C, R, S, Cc, Cs = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cc, lpeg.Cs local formatters = string.formatters @@ -18,6 +20,10 @@ local report_objects = logs.reporter("backend","objects") local report_finalizing = logs.reporter("backend","finalizing") local report_blocked = logs.reporter("backend","blocked") +-- In ConTeXt MkIV we use utf8 exclusively so all strings get mapped onto a hex +-- encoded utf16 string type between <>. We could probably save some bytes by using +-- strings between () but then we end up with escaped ()\ too. + -- gethpos : used -- getpos : used -- getvpos : used @@ -193,67 +199,112 @@ function lpdf.rectangle(width,height,depth) end end --- +-- we could use a hash of predefined unicodes + +-- local function tosixteen(str) -- an lpeg might be faster (no table) +-- if not str or str == "" then +-- return "" -- not () as we want an indication that it's unicode +-- else +-- local r, n = { "")) local function tosixteen(str) -- an lpeg might be faster (no table) if not str or str == "" then return "" -- not () as we want an indication that it's unicode else - local r, n = { " 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + return utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + -- return "" + else + return utfchar(now) + end +end --- lpeg is some 5 times faster than gsub (in test) on escaping +local pattern = P(true) / function() more = 0 end * Cs(pattern^0) --- local escapes = { --- ["\\"] = "\\\\", --- ["/"] = "\\/", ["#"] = "\\#", --- ["<"] = "\\<", [">"] = "\\>", --- ["["] = "\\[", ["]"] = "\\]", --- ["("] = "\\(", [")"] = "\\)", --- } --- --- local escaped = Cs(Cc("(") * (S("\\/#<>[]()")/escapes + P(1))^0 * Cc(")")) --- --- local function toeight(str) --- if not str or str == "" then --- return "()" --- else --- return lpegmatch(escaped,str) --- end --- end --- --- -- no need for escaping .. just use unicode instead +local function fromsixteen(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end +end --- \0 \t \n \r \f ( ) [ ] { } / % +local toregime = regimes.toregime +local fromregime = regimes.fromregime -local function toeight(str) - return "(" .. str .. ")" +local function topdfdoc(str,default) + if not str or str == "" then + return "" + else + return lpegmatch(escaped,toregime("pdfdoc",str,default)) -- could be combined if needed + end end -lpdf.toeight = toeight +local function frompdfdoc(str) + if not str or str == "" then + return "" + else + return fromregime("pdfdoc",str) + end +end --- local escaped = lpeg.Cs((lpeg.S("\0\t\n\r\f ()[]{}/%")/function(s) return format("#%02X",byte(s)) end + lpeg.P(1))^0) --- --- local function cleaned(str) --- return (str and str ~= "" and lpegmatch(escaped,str)) or "" --- end --- --- lpdf.cleaned = cleaned -- not public yet +if not toregime then topdfdoc = function(s) return s end end +if not fromregime then frompdfdoc = function(s) return s end end + +local function toeight(str) + if not str or str == "" then + return "()" + else + return lpegmatch(escaped,str) + end +end + +lpdf.tosixteen = tosixteen +lpdf.toeight = toeight +lpdf.topdfdoc = topdfdoc +lpdf.fromsixteen = fromsixteen +lpdf.frompdfdoc = frompdfdoc local function merge_t(a,b) local t = { } @@ -288,8 +339,8 @@ tostring_d = function(t,contentonly,key) r[rn] = f_key_value(k,toeight(v)) elseif tv == "number" then r[rn] = f_key_number(k,v) - elseif tv == "unicode" then - r[rn] = f_key_value(k,tosixteen(v)) + -- elseif tv == "unicode" then -- can't happen + -- r[rn] = f_key_value(k,tosixteen(v)) elseif tv == "table" then local mv = getmetatable(v) if mv and mv.__lpdftype then @@ -328,8 +379,8 @@ tostring_a = function(t,contentonly,key) r[k] = toeight(v) elseif tv == "number" then r[k] = f_tonumber(v) - elseif tv == "unicode" then - r[k] = tosixteen(v) + -- elseif tv == "unicode" then + -- r[k] = tosixteen(v) elseif tv == "table" then local mv = getmetatable(v) local mt = mv and mv.__lpdftype @@ -358,15 +409,16 @@ tostring_a = function(t,contentonly,key) end end -local tostring_x = function(t) return concat(t," ") end -local tostring_s = function(t) return toeight(t[1]) end -local tostring_u = function(t) return tosixteen(t[1]) end -local tostring_n = function(t) return tostring(t[1]) end -- tostring not needed -local tostring_n = function(t) return f_tonumber(t[1]) end -- tostring not needed -local tostring_c = function(t) return t[1] end -- already prefixed (hashed) -local tostring_z = function() return "null" end -local tostring_t = function() return "true" end -local tostring_f = function() return "false" end +local tostring_x = function(t) return concat(t," ") end +local tostring_s = function(t) return toeight(t[1]) end +local tostring_p = function(t) return topdfdoc(t[1],t[2]) end +local tostring_u = function(t) return tosixteen(t[1]) end +local tostring_n = function(t) return tostring(t[1]) end -- tostring not needed +local tostring_n = function(t) return f_tonumber(t[1]) end -- tostring not needed +local tostring_c = function(t) return t[1] end -- already prefixed (hashed) +local tostring_z = function() return "null" end +local tostring_t = function() return "true" end +local tostring_f = function() return "false" end local tostring_r = function(t) local n = t[1] return n and n > 0 and (n .. " 0 R") or "NULL" end local tostring_v = function(t) @@ -378,18 +430,19 @@ local tostring_v = function(t) end end -local function value_x(t) return t end -- the call is experimental -local function value_s(t,key) return t[1] end -- the call is experimental -local function value_u(t,key) return t[1] end -- the call is experimental -local function value_n(t,key) return t[1] end -- the call is experimental -local function value_c(t) return sub(t[1],2) end -- the call is experimental -local function value_d(t) return tostring_d(t,true) end -- the call is experimental -local function value_a(t) return tostring_a(t,true) end -- the call is experimental -local function value_z() return nil end -- the call is experimental -local function value_t(t) return t.value or true end -- the call is experimental -local function value_f(t) return t.value or false end -- the call is experimental -local function value_r() return t[1] or 0 end -- the call is experimental -- NULL -local function value_v() return t[1] end -- the call is experimental +local function value_x(t) return t end +local function value_s(t) return t[1] end +local function value_p(t) return t[1] end +local function value_u(t) return t[1] end +local function value_n(t) return t[1] end +local function value_c(t) return sub(t[1],2) end +local function value_d(t) return tostring_d(t,true) end +local function value_a(t) return tostring_a(t,true) end +local function value_z() return nil end +local function value_t(t) return t.value or true end +local function value_f(t) return t.value or false end +local function value_r() return t[1] or 0 end -- NULL +local function value_v() return t[1] end local function add_x(t,k,v) rawset(t,k,tostring(v)) end @@ -398,6 +451,7 @@ local mt_d = { __lpdftype = "dictionary", __tostring = tostring_d, __call = valu local mt_a = { __lpdftype = "array", __tostring = tostring_a, __call = value_a } local mt_u = { __lpdftype = "unicode", __tostring = tostring_u, __call = value_u } local mt_s = { __lpdftype = "string", __tostring = tostring_s, __call = value_s } +local mt_p = { __lpdftype = "docstring", __tostring = tostring_p, __call = value_p } local mt_n = { __lpdftype = "number", __tostring = tostring_n, __call = value_n } local mt_c = { __lpdftype = "constant", __tostring = tostring_c, __call = value_c } local mt_z = { __lpdftype = "null", __tostring = tostring_z, __call = value_z } @@ -431,8 +485,12 @@ local function pdfstring(str,default) return setmetatable({ str or default or "" },mt_s) end +local function pdfdocstring(str,default,defaultchar) + return setmetatable({ str or default or "", defaultchar or " " },mt_p) +end + local function pdfunicode(str,default) - return setmetatable({ str or default or "" },mt_u) + return setmetatable({ str or default or "" },mt_u) -- could be a string end local cache = { } -- can be weak @@ -516,6 +574,7 @@ end lpdf.stream = pdfstream -- THIS WILL PROBABLY CHANGE lpdf.dictionary = pdfdictionary lpdf.array = pdfarray +lpdf.docstring = pdfdocstring lpdf.string = pdfstring lpdf.unicode = pdfunicode lpdf.number = pdfnumber @@ -778,145 +837,147 @@ end callbacks.register("finish_pdffile", lpdf.finalizedocument) --- some minimal tracing, handy for checking the order -local function trace_set(what,key) - if trace_resources then - report_finalizing("setting key %a in %a",key,what) +do + + -- some minimal tracing, handy for checking the order + + local function trace_set(what,key) + if trace_resources then + report_finalizing("setting key %a in %a",key,what) + end end -end -local function trace_flush(what) - if trace_resources then - report_finalizing("flushing %a",what) + + local function trace_flush(what) + if trace_resources then + report_finalizing("flushing %a",what) + end end -end -lpdf.protectresources = true + lpdf.protectresources = true -local catalog = pdfdictionary { Type = pdfconstant("Catalog") } -- nicer, but when we assign we nil the Type -local info = pdfdictionary { Type = pdfconstant("Info") } -- nicer, but when we assign we nil the Type ------ names = pdfdictionary { Type = pdfconstant("Names") } -- nicer, but when we assign we nil the Type + local catalog = pdfdictionary { Type = pdfconstant("Catalog") } -- nicer, but when we assign we nil the Type + local info = pdfdictionary { Type = pdfconstant("Info") } -- nicer, but when we assign we nil the Type + ----- names = pdfdictionary { Type = pdfconstant("Names") } -- nicer, but when we assign we nil the Type -local function flushcatalog() - if not environment.initex then - trace_flush("catalog") - catalog.Type = nil - pdfsetcatalog(catalog()) + local function flushcatalog() + if not environment.initex then + trace_flush("catalog") + catalog.Type = nil + pdfsetcatalog(catalog()) + end end -end -local function flushinfo() - if not environment.initex then - trace_flush("info") - info.Type = nil - pdfsetinfo(info()) + local function flushinfo() + if not environment.initex then + trace_flush("info") + info.Type = nil + pdfsetinfo(info()) + end end -end - --- local function flushnames() --- if not environment.initex then --- trace_flush("names") --- names.Type = nil --- pdfsetnames(names()) --- end --- end -function lpdf.addtocatalog(k,v) - if not (lpdf.protectresources and catalog[k]) then - trace_set("catalog",k) - catalog[k] = v + -- local function flushnames() + -- if not environment.initex then + -- trace_flush("names") + -- names.Type = nil + -- pdfsetnames(names()) + -- end + -- end + + function lpdf.addtocatalog(k,v) + if not (lpdf.protectresources and catalog[k]) then + trace_set("catalog",k) + catalog[k] = v + end end -end -function lpdf.addtoinfo(k,v) - if not (lpdf.protectresources and info[k]) then - trace_set("info",k) - info[k] = v + function lpdf.addtoinfo(k,v) + if not (lpdf.protectresources and info[k]) then + trace_set("info",k) + info[k] = v + end end -end --- local function lpdf.addtonames(k,v) --- if not (lpdf.protectresources and names[k]) then --- trace_set("names",k) --- names[k] = v --- end --- end + -- local function lpdf.addtonames(k,v) + -- if not (lpdf.protectresources and names[k]) then + -- trace_set("names",k) + -- names[k] = v + -- end + -- end -local names = pdfdictionary { - -- Type = pdfconstant("Names") -} + local names = pdfdictionary { + -- Type = pdfconstant("Names") + } -local function flushnames() - if next(names) and not environment.initex then - names.Type = pdfconstant("Names") - trace_flush("names") - lpdf.addtocatalog("Names",pdfreference(pdfimmediateobject(tostring(names)))) + local function flushnames() + if next(names) and not environment.initex then + names.Type = pdfconstant("Names") + trace_flush("names") + lpdf.addtocatalog("Names",pdfreference(pdfimmediateobject(tostring(names)))) + end end -end -function lpdf.addtonames(k,v) - if not (lpdf.protectresources and names [k]) then - trace_set("names", k) - names [k] = v + function lpdf.addtonames(k,v) + if not (lpdf.protectresources and names[k]) then + trace_set("names", k) + names [k] = v + end end -end -local dummy = pdfreserveobject() -- else bug in hvmd due so some internal luatex conflict - --- Some day I will implement a proper minimalized resource management. - -local r_extgstates, d_extgstates = pdfreserveobject(), pdfdictionary() local p_extgstates = pdfreference(r_extgstates) -local r_colorspaces, d_colorspaces = pdfreserveobject(), pdfdictionary() local p_colorspaces = pdfreference(r_colorspaces) -local r_patterns, d_patterns = pdfreserveobject(), pdfdictionary() local p_patterns = pdfreference(r_patterns) -local r_shades, d_shades = pdfreserveobject(), pdfdictionary() local p_shades = pdfreference(r_shades) - -local function checkextgstates () if next(d_extgstates ) then addtopageresources("ExtGState", p_extgstates ) end end -local function checkcolorspaces() if next(d_colorspaces) then addtopageresources("ColorSpace",p_colorspaces) end end -local function checkpatterns () if next(d_patterns ) then addtopageresources("Pattern", p_patterns ) end end -local function checkshades () if next(d_shades ) then addtopageresources("Shading", p_shades ) end end - -local function flushextgstates () if next(d_extgstates ) then trace_flush("extgstates") pdfimmediateobject(r_extgstates, tostring(d_extgstates )) end end -local function flushcolorspaces() if next(d_colorspaces) then trace_flush("colorspaces") pdfimmediateobject(r_colorspaces,tostring(d_colorspaces)) end end -local function flushpatterns () if next(d_patterns ) then trace_flush("patterns") pdfimmediateobject(r_patterns, tostring(d_patterns )) end end -local function flushshades () if next(d_shades ) then trace_flush("shades") pdfimmediateobject(r_shades, tostring(d_shades )) end end - -function lpdf.collectedresources() - local ExtGState = next(d_extgstates ) and p_extgstates - local ColorSpace = next(d_colorspaces) and p_colorspaces - local Pattern = next(d_patterns ) and p_patterns - local Shading = next(d_shades ) and p_shades - if ExtGState or ColorSpace or Pattern or Shading then - local collected = pdfdictionary { - ExtGState = ExtGState, - ColorSpace = ColorSpace, - Pattern = Pattern, - Shading = Shading, - -- ProcSet = pdfarray { pdfconstant("PDF") }, - } - return collected() - else - return "" + local r_extgstates, d_extgstates = pdfreserveobject(), pdfdictionary() local p_extgstates = pdfreference(r_extgstates) + local r_colorspaces, d_colorspaces = pdfreserveobject(), pdfdictionary() local p_colorspaces = pdfreference(r_colorspaces) + local r_patterns, d_patterns = pdfreserveobject(), pdfdictionary() local p_patterns = pdfreference(r_patterns) + local r_shades, d_shades = pdfreserveobject(), pdfdictionary() local p_shades = pdfreference(r_shades) + + local function checkextgstates () if next(d_extgstates ) then addtopageresources("ExtGState", p_extgstates ) end end + local function checkcolorspaces() if next(d_colorspaces) then addtopageresources("ColorSpace",p_colorspaces) end end + local function checkpatterns () if next(d_patterns ) then addtopageresources("Pattern", p_patterns ) end end + local function checkshades () if next(d_shades ) then addtopageresources("Shading", p_shades ) end end + + local function flushextgstates () if next(d_extgstates ) then trace_flush("extgstates") pdfimmediateobject(r_extgstates, tostring(d_extgstates )) end end + local function flushcolorspaces() if next(d_colorspaces) then trace_flush("colorspaces") pdfimmediateobject(r_colorspaces,tostring(d_colorspaces)) end end + local function flushpatterns () if next(d_patterns ) then trace_flush("patterns") pdfimmediateobject(r_patterns, tostring(d_patterns )) end end + local function flushshades () if next(d_shades ) then trace_flush("shades") pdfimmediateobject(r_shades, tostring(d_shades )) end end + + function lpdf.collectedresources() + local ExtGState = next(d_extgstates ) and p_extgstates + local ColorSpace = next(d_colorspaces) and p_colorspaces + local Pattern = next(d_patterns ) and p_patterns + local Shading = next(d_shades ) and p_shades + if ExtGState or ColorSpace or Pattern or Shading then + local collected = pdfdictionary { + ExtGState = ExtGState, + ColorSpace = ColorSpace, + Pattern = Pattern, + Shading = Shading, + -- ProcSet = pdfarray { pdfconstant("PDF") }, + } + return collected() + else + return "" + end end -end -function lpdf.adddocumentextgstate (k,v) d_extgstates [k] = v end -function lpdf.adddocumentcolorspace(k,v) d_colorspaces[k] = v end -function lpdf.adddocumentpattern (k,v) d_patterns [k] = v end -function lpdf.adddocumentshade (k,v) d_shades [k] = v end + function lpdf.adddocumentextgstate (k,v) d_extgstates [k] = v end + function lpdf.adddocumentcolorspace(k,v) d_colorspaces[k] = v end + function lpdf.adddocumentpattern (k,v) d_patterns [k] = v end + function lpdf.adddocumentshade (k,v) d_shades [k] = v end + + registerdocumentfinalizer(flushextgstates,3,"extended graphic states") + registerdocumentfinalizer(flushcolorspaces,3,"color spaces") + registerdocumentfinalizer(flushpatterns,3,"patterns") + registerdocumentfinalizer(flushshades,3,"shades") -registerdocumentfinalizer(flushextgstates,3,"extended graphic states") -registerdocumentfinalizer(flushcolorspaces,3,"color spaces") -registerdocumentfinalizer(flushpatterns,3,"patterns") -registerdocumentfinalizer(flushshades,3,"shades") + registerdocumentfinalizer(flushnames,3,"names") -- before catalog + registerdocumentfinalizer(flushcatalog,3,"catalog") + registerdocumentfinalizer(flushinfo,3,"info") -registerdocumentfinalizer(flushnames,3,"names") -- before catalog -registerdocumentfinalizer(flushcatalog,3,"catalog") -registerdocumentfinalizer(flushinfo,3,"info") + registerpagefinalizer(checkextgstates,3,"extended graphic states") + registerpagefinalizer(checkcolorspaces,3,"color spaces") + registerpagefinalizer(checkpatterns,3,"patterns") + registerpagefinalizer(checkshades,3,"shades") -registerpagefinalizer(checkextgstates,3,"extended graphic states") -registerpagefinalizer(checkcolorspaces,3,"color spaces") -registerpagefinalizer(checkpatterns,3,"patterns") -registerpagefinalizer(checkshades,3,"shades") +end -- in strc-bkm: lpdf.registerdocumentfinalizer(function() structures.bookmarks.place() end,1) @@ -927,19 +988,23 @@ end -- ! -> universaltime -local timestamp = os.date("%Y-%m-%dT%X") .. os.timezone(true) +do -function lpdf.timestamp() - return timestamp -end + local timestamp = os.date("%Y-%m-%dT%X") .. os.timezone(true) -function lpdf.pdftimestamp(str) - local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$") - return Y and format("D:%s%s%s%s%s%s%s%s'%s'",Y,M,D,h,m,s,Zs,Zh,Zm) -end + function lpdf.timestamp() + return timestamp + end + + function lpdf.pdftimestamp(str) + local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$") + return Y and format("D:%s%s%s%s%s%s%s%s'%s'",Y,M,D,h,m,s,Zs,Zh,Zm) + end + + function lpdf.id() + return format("%s.%s",tex.jobname,timestamp) + end -function lpdf.id() - return format("%s.%s",tex.jobname,timestamp) end -- return nil is nicer in test prints @@ -1082,25 +1147,29 @@ end -- return formatters["BT /Span << /ActualText (CONTEXT) >> BDC [] TJ % t EMC ET"](code) -local f_actual_text_one = formatters["BT /Span << /ActualText >> BDC [] TJ %s EMC ET"] -local f_actual_text_two = formatters["BT /Span << /ActualText >> BDC [] TJ %s EMC ET"] -local f_actual_text = formatters["/Span <> BDC"] +do -local context = context -local pdfdirect = nodes.pool.pdfdirect + local f_actual_text_one = formatters["BT /Span << /ActualText >> BDC [] TJ %s EMC ET"] + local f_actual_text_two = formatters["BT /Span << /ActualText >> BDC [] TJ %s EMC ET"] + local f_actual_text = formatters["/Span <> BDC"] -function codeinjections.unicodetoactualtext(unicode,pdfcode) - if unicode < 0x10000 then - return f_actual_text_one(unicode,pdfcode) - else - return f_actual_text_two(unicode/1024+0xD800,unicode%1024+0xDC00,pdfcode) + local context = context + local pdfdirect = nodes.pool.pdfdirect + + function codeinjections.unicodetoactualtext(unicode,pdfcode) + if unicode < 0x10000 then + return f_actual_text_one(unicode,pdfcode) + else + return f_actual_text_two(unicode/1024+0xD800,unicode%1024+0xDC00,pdfcode) + end end -end -function commands.startactualtext(str) - context(pdfdirect(f_actual_text(tosixteen(str)))) -end + function commands.startactualtext(str) + context(pdfdirect(f_actual_text(tosixteen(str)))) + end + + function commands.stopactualtext() + context(pdfdirect("EMC")) + end -function commands.stopactualtext() - context(pdfdirect("EMC")) end diff --git a/tex/context/base/m-pstricks.mkii b/tex/context/base/m-pstricks.mkii index bdcf13b24..d41f19871 100644 --- a/tex/context/base/m-pstricks.mkii +++ b/tex/context/base/m-pstricks.mkii @@ -43,7 +43,7 @@ {\input multido \relax \input pstricks \relax \input pst-plot \relax - \loadpstrickscolors{colo-rgb}} + \loadpstrickscolors{colo-rgb.mkii}} {\writestatus{pstricks}{using indirect method; enable write18}} \catcode`\|=\oldbarcode diff --git a/tex/context/base/math-fbk.lua b/tex/context/base/math-fbk.lua index 63a0e9f88..7c97249d6 100644 --- a/tex/context/base/math-fbk.lua +++ b/tex/context/base/math-fbk.lua @@ -20,7 +20,6 @@ local virtualcharacters = { } local identifiers = fonts.hashes.identifiers local lastmathids = fonts.hashes.lastmathids -local tounicode16 = fonts.mappings.tounicode16 -- we need a trick (todo): if we define scriptscript, script and text in -- that order we could use their id's .. i.e. we could always add a font @@ -346,11 +345,11 @@ local function accent_to_extensible(target,newchr,original,oldchr,height,depth,s end local correction = swap and { "down", (olddata.height or 0) - height } or { "down", olddata.height + (offset or 0)} local newdata = { - commands = { correction, { "slot", 1, oldchr } }, - width = olddata.width, - height = height, - depth = depth, - tounicode = tounicode16(unicode), + commands = { correction, { "slot", 1, oldchr } }, + width = olddata.width, + height = height, + depth = depth, + unicode = unicode, } local glyphdata = newdata local nextglyph = olddata.next @@ -401,9 +400,6 @@ local function accent_to_extensible(target,newchr,original,oldchr,height,depth,s end return glyphdata, true else --- if not olddata.tounicode then --- olddata.tounicode = tounicode16(unicode), --- end return olddata, false end end @@ -448,9 +444,9 @@ addextra(0xFE3DF, { description="EXTENSIBLE OF 0x03DF", unicodeslot=0xFE3DF, mat addextra(0xFE3DD, { description="EXTENSIBLE OF 0x03DD", unicodeslot=0xFE3DD, mathextensible = "r", mathstretch = "h", mathclass = "botaccent" } ) addextra(0xFE3B5, { description="EXTENSIBLE OF 0x03B5", unicodeslot=0xFE3B5, mathextensible = "r", mathstretch = "h", mathclass = "botaccent" } ) -virtualcharacters[0xFE3DF] = function(data) local c = data.target.characters[0x23DF] if c then c.tounicode = tounicode16(0x23DF) return c end end -virtualcharacters[0xFE3DD] = function(data) local c = data.target.characters[0x23DD] if c then c.tounicode = tounicode16(0x23DD) return c end end -virtualcharacters[0xFE3B5] = function(data) local c = data.target.characters[0x23B5] if c then c.tounicode = tounicode16(0x23B5) return c end end +virtualcharacters[0xFE3DF] = function(data) local c = data.target.characters[0x23DF] if c then c.unicode = 0x23DF return c end end +virtualcharacters[0xFE3DD] = function(data) local c = data.target.characters[0x23DD] if c then c.unicode = 0x23DD return c end end +virtualcharacters[0xFE3B5] = function(data) local c = data.target.characters[0x23B5] if c then c.unicode = 0x23B5 return c end end -- todo: add some more .. numbers might change @@ -524,7 +520,7 @@ local function actuarian(data) -- todo: add alttext -- compromise: lm has large hooks e.g. \actuarial{a} width = basewidth + 4 * linewidth, - tounicode = tounicode16(0x20E7), + unicode = 0x20E7, commands = { { "right", 2 * linewidth }, { "down", - baseheight - 3 * linewidth }, diff --git a/tex/context/base/math-frc.lua b/tex/context/base/math-frc.lua index 4f531a530..a663fb3ec 100644 --- a/tex/context/base/math-frc.lua +++ b/tex/context/base/math-frc.lua @@ -25,13 +25,13 @@ table.setmetatableindex(resolved, function(t,k) return v end) -local normalatop = context.normalatop -local normalover = context.normalover +local ctx_normalatop = context.normalatop +local ctx_normalover = context.normalover function commands.math_frac(how,left,right,width) if how == v_no then if left == 0x002E and right == 0x002E then - normalatop() + ctx_normalatop() else context("\\atopwithdelims%s%s",resolved[left],resolved[right]) end @@ -43,7 +43,7 @@ function commands.math_frac(how,left,right,width) end else -- v_auto if left == 0x002E and right == 0x002E then - normalover() + ctx_normalover() else context("\\overwithdelims%s%s",resolved[left],resolved[right]) end diff --git a/tex/context/base/math-frc.mkiv b/tex/context/base/math-frc.mkiv index 7032c665b..4e5a35b29 100644 --- a/tex/context/base/math-frc.mkiv +++ b/tex/context/base/math-frc.mkiv @@ -201,24 +201,6 @@ \expandafter\math_fraction_inner_margin \fi} -\def\math_fraction_inner_normal#1#2% - {\Ustack{% - {\usemathstyleparameter\mathfractionparameter{#1}}% we should store this one - \math_frac_command - {\usemathstyleparameter\mathfractionparameter{#2}}% and reuse it here - }\endgroup} - -\def\math_fraction_inner_margin#1#2% - {\Ustack{% - {\kern\d_math_fraction_margin - \usemathstyleparameter\mathfractionparameter{#1}% we should store this one - \kern\d_math_fraction_margin}% - \math_frac_command - {\kern\d_math_fraction_margin - \usemathstyleparameter\mathfractionparameter{#2}% and reuse it here - \kern\d_math_fraction_margin}% - }\endgroup} - \setvalue{\??mathfractionalternative\v!outer}% {\ifcase\d_math_fraction_margin \expandafter\math_fraction_outer_normal @@ -226,21 +208,62 @@ \expandafter\math_fraction_outer_margin \fi} +\setvalue{\??mathfractionalternative\v!both}% + {\ifcase\d_math_fraction_margin + \expandafter\math_fraction_both_normal + \else + \expandafter\math_fraction_both_margin + \fi} + +% todo: store first state and reuse second time + +\def\math_fraction_inner_normal#1#2% + {\Ustack{% + {\usemathstyleparameter\mathfractionparameter{#1}}\math_frac_command{\usemathstyleparameter\mathfractionparameter{#2}}% + }\endgroup} + \def\math_fraction_outer_normal#1#2% {\Ustack{% \usemathstyleparameter\mathfractionparameter - {{#1}\math_frac_command{#2}}% + {% + {#1}\math_frac_command{#2}% + }% + }\endgroup} + +\def\math_fraction_both_normal#1#2% + {\Ustack{% + \usemathstyleparameter\mathfractionparameter + {% + \usemathstyleparameter\mathfractionparameter{#1}\math_frac_command{\usemathstyleparameter\mathfractionparameter#2}% + }% + }\endgroup} + +\def\math_fraction_inner_margin#1#2% + {\Ustack{% + {\kern\d_math_fraction_margin\usemathstyleparameter\mathfractionparameter{#1}\kern\d_math_fraction_margin}% + \math_frac_command + {\kern\d_math_fraction_margin\usemathstyleparameter\mathfractionparameter{#2}\kern\d_math_fraction_margin}% }\endgroup} \def\math_fraction_outer_margin#1#2% {\Ustack{% \usemathstyleparameter\mathfractionparameter - {{\kern\d_math_fraction_margin#1\kern\d_math_fraction_margin}% + {% + {\kern\d_math_fraction_margin#1\kern\d_math_fraction_margin}% \math_frac_command - {\kern\d_math_fraction_margin#2\kern\d_math_fraction_margin}}% + {\kern\d_math_fraction_margin#2\kern\d_math_fraction_margin}% + }% }\endgroup} -\definemathfraction[frac][\c!mathstyle=] +\def\math_fraction_both_margin#1#2% + {\Ustack{% + \usemathstyleparameter\mathfractionparameter + {% + {\kern\d_math_fraction_margin\usemathstyleparameter\mathfractionparameter#1\kern\d_math_fraction_margin}% + \math_frac_command + {\kern\d_math_fraction_margin\usemathstyleparameter\mathfractionparameter#2\kern\d_math_fraction_margin}% + }% + }\endgroup} \unexpanded\def\xfrac {\begingroup\let\xfrac\xxfrac\math_frac_alternative\scriptstyle} \unexpanded\def\xxfrac{\begingroup \math_frac_alternative\scriptscriptstyle} @@ -277,9 +300,30 @@ % \unexpanded\def\dfrac #1#2{{\displaystyle {{#1}\normalover {#2}}}} % \unexpanded\def\tfrac #1#2{{\textstyle {{#1}\normalover {#2}}}} -\definemathfraction[dfrac][\c!alternative=\v!outer,\c!mathstyle=\s!display] -\definemathfraction[tfrac][\c!alternative=\v!outer,\c!mathstyle=\s!text] -\definemathfraction[sfrac][\c!alternative=\v!outer,\c!mathstyle=\s!script] +% \definemathfraction[frac] [\c!mathstyle=] +% \definemathfraction[dfrac][\c!alternative=\v!outer,\c!mathstyle=\s!display] +% \definemathfraction[tfrac][\c!alternative=\v!outer,\c!mathstyle=\s!text] +% \definemathfraction[sfrac][\c!alternative=\v!outer,\c!mathstyle=\s!script] + +% \definemathfraction[frac] [\c!alternative=\v!inner, \c!mathstyle=\inlineordisplaymath\s!script {\s!cramped,\s!text}] +% \definemathfraction[tfrac][\c!alternative=\inlineordisplaymath\v!inner\v!both,\c!mathstyle=\inlineordisplaymath\s!script {\s!cramped,\s!script}] +% \definemathfraction[sfrac][\c!alternative=\inlineordisplaymath\v!inner\v!both,\c!mathstyle=\inlineordisplaymath\s!scriptscript{\s!cramped,\s!scriptscript}] +% \definemathfraction[dfrac][\c!alternative=\v!inner, \c!mathstyle=\s!display] + +\definemathfraction[i:frac] [\c!alternative=\v!inner,\c!mathstyle=\s!script] +\definemathfraction[i:tfrac][\c!alternative=\v!inner,\c!mathstyle=\s!script] +\definemathfraction[i:sfrac][\c!alternative=\v!inner,\c!mathstyle=\s!scriptscript] +\definemathfraction[i:dfrac][\c!alternative=\v!inner,\c!mathstyle=\s!display] + +\definemathfraction[d:frac] [\c!alternative=\v!inner,\c!mathstyle={\s!cramped,\s!text}] +\definemathfraction[d:tfrac][\c!alternative=\v!both ,\c!mathstyle={\s!cramped,\s!script}] +\definemathfraction[d:sfrac][\c!alternative=\v!both ,\c!mathstyle={\s!cramped,\s!scriptscript}] +\definemathfraction[d:dfrac][\c!alternative=\v!inner,\c!mathstyle=\s!display] + +\unexpanded\def\frac {\csname\inlineordisplaymath id:frac\endcsname} +\unexpanded\def\tfrac{\csname\inlineordisplaymath id:tfrac\endcsname} +\unexpanded\def\sfrac{\csname\inlineordisplaymath id:sfrac\endcsname} +\unexpanded\def\dfrac{\csname\inlineordisplaymath id:dfrac\endcsname} % \definemathfraction[ddfrac][\c!mathstyle=\s!display] % \definemathfraction[ttfrac][\c!mathstyle=\s!text] diff --git a/tex/context/base/math-ini.mkiv b/tex/context/base/math-ini.mkiv index a7b2a924c..070b8d246 100644 --- a/tex/context/base/math-ini.mkiv +++ b/tex/context/base/math-ini.mkiv @@ -91,6 +91,20 @@ \unexpanded\def\resetmathattributes{\ctxcommand{resetmathattributes()}} +% handy + +\newconditional\indisplaymath + +\appendtoks + \setfalse\indisplaymath +\to \everymath + +\appendtoks + \settrue\indisplaymath +\to \everydisplay + +\def\inlineordisplaymath{\ifconditional\indisplaymath\expandafter\secondoftwoarguments\else\expandafter\firstoftwoarguments\fi} + % \unexpanded\def\rawmathcharacter#1% slow but only for tracing % {\begingroup % \ifmmode @@ -298,15 +312,15 @@ \def\utfmathcommand#1{\ctxcommand{utfmathcommand(\!!bs#1\!!es)}} \def\utfmathfiller #1{\ctxcommand{utfmathfiller (\!!bs#1\!!es)}} -\def\utfmathclassfiltered #1#2{\ctxcommand{utfmathclass (\!!bs#1\!!es,nil,"#2")}} -\def\utfmathcommandfiltered#1#2{\ctxcommand{utfmathcommand(\!!bs#1\!!es,nil,"#2")}} +\def\utfmathclassfiltered #1#2{\ctxcommand{utfmathclass (\!!bs#1\!!es,nil,\!!bs#2\!!es)}} +\def\utfmathcommandfiltered#1#2{\ctxcommand{utfmathcommand(\!!bs#1\!!es,nil,\!!bs#2\!!es)}} \def\utfmathcommandabove #1{\ctxcommand{utfmathcommandabove (\!!bs#1\!!es)}} \def\utfmathcommandbelow #1{\ctxcommand{utfmathcommandbelow (\!!bs#1\!!es)}} \def\utfmathcommandfiller#1{\ctxcommand{utfmathcommandfiller(\!!bs#1\!!es)}} \unexpanded\def\doifelseutfmathaccent #1{\ctxcommand{doifelseutfmathaccent(\!!bs#1\!!es)}} -\unexpanded\def\doifelseutfmathaccentfiltered#1#2{\ctxcommand{doifelseutfmathaccent(\!!bs#1\!!es,"#2")}} +\unexpanded\def\doifelseutfmathaccentfiltered#1#2{\ctxcommand{doifelseutfmathaccent(\!!bs#1\!!es,\!!bs#2\!!es)}} \unexpanded\def\doifelseutfmathabove #1{\ctxcommand{doifelseutfmathabove(\!!bs#1\!!es)}} \unexpanded\def\doifelseutfmathbelow #1{\ctxcommand{doifelseutfmathbelow(\!!bs#1\!!es)}} diff --git a/tex/context/base/math-rad.mkvi b/tex/context/base/math-rad.mkvi index f22d62374..2115ab9a4 100644 --- a/tex/context/base/math-rad.mkvi +++ b/tex/context/base/math-rad.mkvi @@ -30,7 +30,7 @@ \unexpanded\def\sqrt{\doifnextoptionalcselse\rootwithdegree\rootwithoutdegree} -\def\styledrootradical#1#2% so that \text works ok ... \rootradical behaves somewhat weird +\unexpanded\def\styledrootradical#1#2% so that \text works ok ... \rootradical behaves somewhat weird {\normalexpanded{\rootradical{\normalunexpanded{#1}}{\noexpand\triggermathstyle{\normalmathstyle}\normalunexpanded{#2}}}} \unexpanded\def\rootwithdegree[#1]{\rootradical{#1}} diff --git a/tex/context/base/math-stc.mkvi b/tex/context/base/math-stc.mkvi index ca39287c5..349664891 100644 --- a/tex/context/base/math-stc.mkvi +++ b/tex/context/base/math-stc.mkvi @@ -988,6 +988,15 @@ \definemathextensible [\v!mathematics] [erightharpoondownfill] ["21C1] \definemathextensible [\v!mathematics] [erightharpoonupfill] ["21C0] +\definemathextensible [\v!mathematics] [eoverbarfill] ["FE33E] +\definemathextensible [\v!mathematics] [eunderbarfill] ["FE33F] +\definemathextensible [\v!mathematics] [eoverbracefill] ["FE3DE] +\definemathextensible [\v!mathematics] [eunderbracefill] ["FE3DF] +\definemathextensible [\v!mathematics] [eoverparentfill] ["FE3DC] +\definemathextensible [\v!mathematics] [eunderparentfill] ["FE3DD] +\definemathextensible [\v!mathematics] [eoverbracketfill] ["FE3B4] +\definemathextensible [\v!mathematics] [eunderbracketfill] ["FE3B5] + \definemathextensible [\v!text] [trel] ["002D] \definemathextensible [\v!text] [tequal] ["003D] \definemathextensible [\v!text] [tmapsto] ["21A6] @@ -1170,6 +1179,15 @@ \defineextensiblefiller [Leftrightarrowfill] ["27FA] \defineextensiblefiller [Leftrightarrowfill] ["27FA] +%defineextensiblefiller [overbarfill] ["FE33E] % untested +%defineextensiblefiller [underbarfill] ["FE33F] % untested +\defineextensiblefiller [overbracefill] ["FE3DE] % untested +\defineextensiblefiller [underbracefill] ["FE3DF] % untested +\defineextensiblefiller [overparentfill] ["FE3DC] % untested +\defineextensiblefiller [underparentfill] ["FE3DD] % untested +\defineextensiblefiller [overbracketfill] ["FE3B4] % untested +\defineextensiblefiller [underbracketfill] ["FE3B5] % untested + %D Extra: \unexpanded\edef\singlebond{\mathematics{\mathsurround\zeropoint\char\number"002D}} diff --git a/tex/context/base/math-tag.lua b/tex/context/base/math-tag.lua index 638c4629c..6b555650e 100644 --- a/tex/context/base/math-tag.lua +++ b/tex/context/base/math-tag.lua @@ -141,8 +141,7 @@ local function getunicode(n) -- instead of getchar local char = getchar(n) local font = font_of_family(getfield(n,"fam")) -- font_of_family local data = fontcharacters[font][char] - local unic = data.tounicode - return unic and fromunicode16(unic) or char + return data.unicode or char end process = function(start) -- we cannot use the processor as we have no finalizers (yet) diff --git a/tex/context/base/mult-de.mkii b/tex/context/base/mult-de.mkii index 1751f4780..53f0dfd6a 100644 --- a/tex/context/base/mult-de.mkii +++ b/tex/context/base/mult-de.mkii @@ -653,7 +653,7 @@ \setinterfaceconstant{coupling}{verknuepfung} \setinterfaceconstant{couplingway}{verkopplungsart} \setinterfaceconstant{criterium}{kriterium} -\setinterfaceconstant{css}{css} +\setinterfaceconstant{cssfile}{cssfile} \setinterfaceconstant{current}{aktuell} \setinterfaceconstant{cutspace}{cutspace} \setinterfaceconstant{dash}{strich} diff --git a/tex/context/base/mult-def.lua b/tex/context/base/mult-def.lua index ffb95f76b..b41053dd5 100644 --- a/tex/context/base/mult-def.lua +++ b/tex/context/base/mult-def.lua @@ -6508,8 +6508,8 @@ return { ["export"] = { ["en"]="export", }, - ["css"] = { - ["en"]="css", + ["cssfile"] = { + ["en"]="cssfile", }, ["xhtml"] = { ["en"]="xhtml", diff --git a/tex/context/base/mult-def.mkiv b/tex/context/base/mult-def.mkiv index 5761e6cfb..055f72dc9 100644 --- a/tex/context/base/mult-def.mkiv +++ b/tex/context/base/mult-def.mkiv @@ -45,6 +45,9 @@ \def\c!keeptogether {keeptogether} \def\c!viewerprefix {viewerprefix} +\def\v!display {display} +\def\v!inline {inline} + \def\c!dataset {dataset} \def\c!sectionblock {sectionblock} \def\c!language {language} @@ -96,6 +99,8 @@ \def\v!long {long} \def\v!box {box} +\def\v!bookmark {bookmark} + \def\v!vfenced {vfenced} \def\v!bothtext {bothtext} diff --git a/tex/context/base/mult-en.mkii b/tex/context/base/mult-en.mkii index 72185f3ab..346f94cad 100644 --- a/tex/context/base/mult-en.mkii +++ b/tex/context/base/mult-en.mkii @@ -653,7 +653,7 @@ \setinterfaceconstant{coupling}{coupling} \setinterfaceconstant{couplingway}{couplingway} \setinterfaceconstant{criterium}{criterium} -\setinterfaceconstant{css}{css} +\setinterfaceconstant{cssfile}{cssfile} \setinterfaceconstant{current}{current} \setinterfaceconstant{cutspace}{cutspace} \setinterfaceconstant{dash}{dash} diff --git a/tex/context/base/mult-fr.mkii b/tex/context/base/mult-fr.mkii index 2a6d85c91..681c67bbc 100644 --- a/tex/context/base/mult-fr.mkii +++ b/tex/context/base/mult-fr.mkii @@ -653,7 +653,7 @@ \setinterfaceconstant{coupling}{couplage} \setinterfaceconstant{couplingway}{modecouplage} \setinterfaceconstant{criterium}{critere} -\setinterfaceconstant{css}{css} +\setinterfaceconstant{cssfile}{cssfile} \setinterfaceconstant{current}{courant} \setinterfaceconstant{cutspace}{cutspace} \setinterfaceconstant{dash}{pointille} diff --git a/tex/context/base/mult-it.mkii b/tex/context/base/mult-it.mkii index 9eb0139a1..4f9941a04 100644 --- a/tex/context/base/mult-it.mkii +++ b/tex/context/base/mult-it.mkii @@ -653,7 +653,7 @@ \setinterfaceconstant{coupling}{accoppiamento} \setinterfaceconstant{couplingway}{modoaccoppiamento} \setinterfaceconstant{criterium}{criterio} -\setinterfaceconstant{css}{css} +\setinterfaceconstant{cssfile}{cssfile} \setinterfaceconstant{current}{corrente} \setinterfaceconstant{cutspace}{cutspace} \setinterfaceconstant{dash}{dash} diff --git a/tex/context/base/mult-nl.mkii b/tex/context/base/mult-nl.mkii index 881b4f467..771d48c3c 100644 --- a/tex/context/base/mult-nl.mkii +++ b/tex/context/base/mult-nl.mkii @@ -653,7 +653,7 @@ \setinterfaceconstant{coupling}{koppeling} \setinterfaceconstant{couplingway}{koppelwijze} \setinterfaceconstant{criterium}{criterium} -\setinterfaceconstant{css}{css} +\setinterfaceconstant{cssfile}{cssfile} \setinterfaceconstant{current}{huidige} \setinterfaceconstant{cutspace}{snijwit} \setinterfaceconstant{dash}{streep} diff --git a/tex/context/base/mult-pe.mkii b/tex/context/base/mult-pe.mkii index 076342282..7fa8bb772 100644 --- a/tex/context/base/mult-pe.mkii +++ b/tex/context/base/mult-pe.mkii @@ -653,7 +653,7 @@ \setinterfaceconstant{coupling}{تزویج} \setinterfaceconstant{couplingway}{روش‌تزویج} \setinterfaceconstant{criterium}{criterium} -\setinterfaceconstant{css}{css} +\setinterfaceconstant{cssfile}{cssfile} \setinterfaceconstant{current}{جاری} \setinterfaceconstant{cutspace}{فضای‌برش} \setinterfaceconstant{dash}{دش} diff --git a/tex/context/base/mult-ro.mkii b/tex/context/base/mult-ro.mkii index ec372c6ba..0ed0df4f1 100644 --- a/tex/context/base/mult-ro.mkii +++ b/tex/context/base/mult-ro.mkii @@ -653,7 +653,7 @@ \setinterfaceconstant{coupling}{cuplare} \setinterfaceconstant{couplingway}{modcuplare} \setinterfaceconstant{criterium}{criteriu} -\setinterfaceconstant{css}{css} +\setinterfaceconstant{cssfile}{cssfile} \setinterfaceconstant{current}{curent} \setinterfaceconstant{cutspace}{cutspace} \setinterfaceconstant{dash}{dash} diff --git a/tex/context/base/node-fin.lua b/tex/context/base/node-fin.lua index bb3800615..1566e099f 100644 --- a/tex/context/base/node-fin.lua +++ b/tex/context/base/node-fin.lua @@ -128,7 +128,7 @@ end local nsdata, nsnone, nslistwise, nsforced, nsselector, nstrigger local current, current_selector, done = 0, 0, false -- nb, stack has a local current ! -local nsbegin, nsend +local nsbegin, nsend, nsreset function states.initialize(namespace,attribute,head) nsdata = namespace.data @@ -142,6 +142,7 @@ function states.initialize(namespace,attribute,head) done = false -- todo: done cleanup nsstep = namespace.resolve_step if nsstep then + nsreset = namespace.resolve_reset nsbegin = namespace.resolve_begin nsend = namespace.resolve_end nspush = namespace.push @@ -483,14 +484,17 @@ end -- experimental local function stacker(namespace,attribute,head,default) -- no triggering, no inheritance, but list-wise - nsbegin() + +-- nsbegin() + local stacked = false + local current = head local previous = head local done = false - local okay = false local attrib = default or unsetvalue local check = false local leader = false + while current do local id = getid(current) if id == glyph_code then @@ -507,6 +511,10 @@ local function stacker(namespace,attribute,head,default) -- no triggering, no in elseif nslistwise then local a = getattr(current,attribute) if a and attrib ~= a and nslistwise[a] then -- viewerlayer +-- if not stacked then +-- stacked = true +-- nsbegin() +-- end head = insert_node_before(head,current,copied(nsdata[a])) local list = stacker(namespace,attribute,content,a) setfield(current,"list",list) @@ -529,13 +537,15 @@ local function stacker(namespace,attribute,head,default) -- no triggering, no in if check then local a = getattr(current,attribute) or unsetvalue if a ~= attrib then +if not stacked then + stacked = true + nsbegin() +end local n = nsstep(a) if n then - -- !!!! TEST CODE !!!! - -- head = insert_node_before(head,current,copied(nsdata[tonumber(n)])) -- a head = insert_node_before(head,current,tonut(n)) -- a end - attrib, done, okay = a, true, true + attrib, done = a, true if leader then -- tricky as a leader has to be a list so we cannot inject before local list, ok = stacker(namespace,attribute,leader,attrib) @@ -549,19 +559,23 @@ local function stacker(namespace,attribute,head,default) -- no triggering, no in previous = current current = getnext(current) end - if okay then - local n = nsend() - if n then - -- !!!! TEST CODE !!!! - -- head = insert_node_after(head,previous,copied(nsdata[tostring(n)])) - head = insert_node_after(head,previous,tonut(n)) - end + +if stacked then + + local n = nsend() + while n do + head = insert_node_after(head,previous,tonut(n)) + n = nsend() end + +end + return head, done end states.stacker = function(namespace,attribute,head,default) local head, done = stacker(namespace,attribute,tonut(head),default) + nsreset() return tonode(head), done end diff --git a/tex/context/base/page-sid.mkiv b/tex/context/base/page-sid.mkiv index c85565703..4760ca2f6 100644 --- a/tex/context/base/page-sid.mkiv +++ b/tex/context/base/page-sid.mkiv @@ -15,21 +15,18 @@ \unprotect -% These macro deal with side floats. We started with Daniel -% Comenetz macros as published in TUGBoat Volume 14 (1993), -% No.\ 1: Anchored Figures at Either Margin. I extended and -% patched the macros to suite our needs which results in a -% messy module. Therefore, this module badly needs an update -% because it's now a mixture of old and new macros. +% These macro deal with side floats. We started with Daniel Comenetz macros as published +% in TUGBoat Volume 14 (1993), No.\ 1: Anchored Figures at Either Margin. I extended and +% patched the macros to suite our needs which results in a messy module. Therefore, this +% module badly needs an update because it's now a mixture of old and new macros. % Interesting cases where it goes wrong: % % \placefigure[left]{}{} \dorecurse{3}{\input ward } {\par} \input ward % -% Here we get an unwanted carried over hangindent and parindent. A -% solution is to associate it with the local par node instead. This -% is something to discuss with Taco as it could be a new luatex -% feature: explicitly set par properties. +% Here we get an unwanted carried over hangindent and parindent. A solution is to associate +% it with the local par node instead. This is something to discuss with Taco as it could be +% a new luatex/mkiv feature: explicitly set par properties. % Maybe I should just rewrite the lot. @@ -223,11 +220,26 @@ \endgroup \ifdim\parskip>\zeropoint % why this test ? \ifdim\d_page_sides_bottomskip>\parskip - \nowhitespace - \vskip\d_page_sides_bottomskip + % \nowhitespace + % \vskip\d_page_sides_bottomskip + \blank[\v!nowhite,\the\dimexpr\d_page_sides_bottomskip] \fi + \else + \blank[\the\d_page_sides_bottomskip]% new, so needs checking \fi} +% alternative method (unsnapped) +% +% \def\page_sides_flush_floats_indeed +% {\scratchdimen\dimexpr\d_page_sides_vsize-\d_page_sides_bottomskip-\pagetotal\relax +% \ifdim\parskip>\zeropoint % why this test ? +% \ifdim\scratchdimen>\parskip +% \blank[\v!nowhite,\the\scratchdimen] % better in stages +% \fi +% \else +% \blank[\the\scratchdimen] +% \fi} + \def\page_sides_check_floats_after_par {\page_sides_check_floats_indeed \ifdim\oldpagetotal=\pagetotal \else diff --git a/tex/context/base/publ-dat.lua b/tex/context/base/publ-dat.lua index f35ae2fa9..20f545d8b 100644 --- a/tex/context/base/publ-dat.lua +++ b/tex/context/base/publ-dat.lua @@ -322,7 +322,7 @@ local value = Cs((somevalue * ((spacing * hash * spacing)/"" * somevalue)^0 local forget = percent^1 * (1-lineending)^0 local spacing = spacing * forget^0 * spacing local assignment = spacing * key * spacing * equal * spacing * value * spacing -local shortcut = P("@") * (P("string") + P("STRING")) * spacing * left * ((assignment * Carg(1))/do_shortcut * comma^0)^0 * spacing * right +local shortcut = P("@") * (P("string") + P("STRING") + P("String")) * spacing * left * ((assignment * Carg(1))/do_shortcut * comma^0)^0 * spacing * right local definition = category * spacing * left * spacing * tag * spacing * comma * Ct((assignment * comma^0)^0) * spacing * right * Carg(1) / do_definition local comment = keyword * spacing * left * (1-right)^0 * spacing * right diff --git a/tex/context/base/publ-imp-apa.mkvi b/tex/context/base/publ-imp-apa.mkvi index 38ea0c74f..8732e782e 100644 --- a/tex/context/base/publ-imp-apa.mkvi +++ b/tex/context/base/publ-imp-apa.mkvi @@ -21,6 +21,20 @@ \startbtxrenderingdefinitions[apa] +%D Reference: +%D \startTEX +%D @Book{APA2010, +%D title ={Publication Manual of the American Psychological Association}, +%D year ={2010}, +%D edition ={Sixth}, +%D address ={Washington, DC}, +%D publisher={American Psychological Association}, +%D pages ={291}, +%D url ={http://www.apa.org/books/}, +%D } +%D \stopTEX + + %D In order to get journals expanded (or normalized or abbreviated) you need to load %D a list: %D @@ -35,22 +49,30 @@ [en] [apa:mastersthesis={Master's thesis}, apa:phdthesis={PhD thesis}, - apa:technicalreport={Technical report}, + apa:technicalreport={Tech. Rep.}, % Technical report + apa:supplement={Suppl.}, % Supplement apa:patent=patent, - apa:editor=editor, - apa:editors=editors, - apa:edition=edition, + apa:Translator={Trans.}, % Translator(s) + apa:editor={Ed.}, % editor + apa:editors={Eds.}, % editors + apa:edition={ed.}, % edition apa:volume=volume, - apa:Volume=Volume, + apa:Volume={Vol.}, % Volume + apa:Volumes={Vols.}, % Volumes apa:number=number, - apa:Number=Number, + apa:Number={No.}, % Number + apa:nd={n.d.}, % no date apa:in=in, apa:of=of, apa:In=In, + apa:Part={Pt.}, % Part apa:p=p, apa:pp=pp, apa:pages=pages, apa:and=and, + apa:Author=Author, % TODO, should be typeset in italic... + apa:Advanced={Advanced online publication}, + apa:Retrieved={Retrieved from}, apa:others={et al.}] \setupbtxlabeltext @@ -59,8 +81,8 @@ apa:phdthesis={Thèse de doctorat}, apa:technicalreport={Rapport technique}, apa:patent=brevet, - apa:editor=éditeur, - apa:editors=éditeurs, + apa:editor={Éd.}, % éditeur + apa:editors={Éds.}, % éditeurs apa:edition=édition, apa:volume=volume, apa:Volume=Volume, @@ -73,6 +95,9 @@ apa:pp=pp, apa:pages=pages, apa:and=et, + apa:Author=Auteur, + apa:Advanced={Publication en ligne anticipée}, + apa:Retrieved={Téléchargé de}, apa:others={et al.}] \setupbtxlabeltext @@ -95,6 +120,9 @@ apa:pp=S, apa:pages=Seiten, apa:and=und, + apa:Author=Autor, + apa:Advanced={Erweiterte Online-Publikation}, % Check this German! + apa:Retrieved={aus abgerufen}, % heruntergeladen? Check this German! apa:others={et al.}] %D The variables control the shared code for which we use a tex definition with @@ -123,39 +151,35 @@ otherwise=\btxperiod] \starttexdefinition btx:apa:wherefrom #field +% TODO: for publisher, if =author use "Author" \btxdoifelse {address} { - \getvariable{btx:apa:\currentbtxcategory}{left} - \btxdoifelse {country} { - \btxflush{address} + \btxspace + \btxflush{address} + \btxdoif {country} { \btxcomma \btxflush{country} - \btxdoif {#field} { - \btxcolon - \btxflush{field} - } - } { - \btxflush{address} - \btxdoif {#field} { - \btxcomma - \btxflush{#field} - } } - \getvariable{btx:apa:\currentbtxcategory}{right} + \btxdoif {#field} { + \btxcolon + \btxflush{#field} + } + \btxperiod } { \btxdoifelse {country} { - \getvariable{btx:apa:\currentbtxcategory}{left} + \btxspace \btxflush{country} \btxdoif {#field} { \btxcolon \btxflush{#field} } - \getvariable{btx:apa:\currentbtxcategory}{right} + \btxperiod } { \btxdoifelse {#field} { \getvariable{btx:apa:#field}{left} \btxflush{#field} \getvariable{btx:apa:#field}{right} } { + % check that this is needed! \getvariable{btx:apa:#field}{otherwise} } } @@ -172,25 +196,27 @@ \starttexdefinition btx:apa:title-and-series \btxdoif {title} { - %btxflush{converters.Word -> title} + \btxspace \btxflush{Word -> title} \btxdoif {series} { - \btxlparent + \btxlparenthesis \btxflush{series} - \btxrparent + \btxrparenthesis } \btxperiod } \stoptexdefinition +% can these two be elegantly collapsed somehow using #it, for example? + \starttexdefinition btx:apa:title-it-and-series \btxdoif {title} { - %texdefinition{btx:apa:italic}{converters.Word -> title} + \btxspace \texdefinition{btx:apa:italic}{Word -> title} \btxdoif {series} { - \btxlparent + \btxlparenthesis \btxflush{series} - \btxrparent + \btxrparenthesis } \btxperiod } @@ -198,19 +224,22 @@ \disablemode[btx:apa:edited-book] % hm, ugly -\starttexdefinition btx:apa:author-and-year - \btxdoif {author} { - \btxflushauthor{author} - } +\starttexdefinition btx:apa:suffixedyear \btxdoif {year} { - \btxlparent - % \btxflush{suffixedyear} + \btxlparenthesis \btxflush{year} \btxdoif {suffix} { \btxflush{suffix} } - \btxrparent + \btxrparenthesis + } +\stoptexdefinition + +\starttexdefinition btx:apa:author-and-year + \btxdoif {author} { + \btxflushauthor{author} } + \texdefinition{btx:apa:suffixedyear} \btxperiod \stoptexdefinition @@ -224,22 +253,15 @@ \btxrbracket } } - \btxdoif {year} { - \btxlparent - % \btxflush{suffixedyear} - \btxflush{year} - \btxdoif {suffix} { - \btxflush{suffix} - } - \btxrparent - } + \texdefinition{btx:apa:suffixedyear} \btxperiod \stoptexdefinition \starttexdefinition btx:apa:author-editors-crossref-year - \btxdoif {author} { + % TODO: if there is no author or editor, then use publisher... + \btxdoifelse {author} { \btxflushauthor{author} - } { + } { \btxdoifelse {editor} { \setmode{btx:apa:edited-book} \btxflushauthor{editor} @@ -259,15 +281,7 @@ } } } - \btxdoif {year} { - \btxlparent - % \btxflush{suffixedyear} - \btxflush{year} - \btxdoif {suffix} { - \btxflush{suffix} - } - \btxrparent - } + \texdefinition{btx:apa:suffixedyear} \btxperiod \stoptexdefinition @@ -288,21 +302,58 @@ \btxrbracket } } - \btxspace - \btxdoif {year} { - \btxlparent - % \btxflush{suffixedyear} - \btxflush{year} - \btxdoif {suffix} { - \btxflush{suffix} + \texdefinition{btx:apa:suffixedyear} + \btxperiod +\stoptexdefinition + +% No longer used (not conforming to APA style) +\starttexdefinition btx:apa:title-it + \btxdoif {title} { + \btxspace + \texdefinition{btx:apa:italic}{Word -> title} + \btxflush{Word -> title} + \btxperiod + } +\stoptexdefinition + +\starttexdefinition btx:apa:journal-volume-issue + \btxdoifelse {journal} { + % expandedjournal abbreviatedjournal + \texdefinition{btx:apa:italic}{expandedjournal -> journal} + } { + \btxdoif {crossref} { + \btxlabeltext{apa:In} + \btxspace + \btxflush{crossref} + } + } + \btxdoifelse {volume} { + \btxspace + \texdefinition{btx:apa:italic}{volume} + \btxdoif {issue} { + \btxlparenthesis + \btxflush{issue} + \btxrparenthesis + } + \btxcomma + } { + \btxdoifelse {doi} { + \btxspace + \btxlabeltext{apa:Advanced} + \btxperiod + } { + \btxdoif {url} { + \btxspace + \btxlabeltext{apa:Advanced} + \btxperiod + } } - \btxrparent } - \btxperiod \stoptexdefinition \starttexdefinition btx:apa:note \btxdoif {note} { + % Note: no punctuation \btxspace \btxflush{note} \btxperiod @@ -311,18 +362,48 @@ \starttexdefinition btx:apa:comment \btxdoif {comment} { + % Note: no punctuation \btxspace \btxflush{comment} \btxperiod } \stoptexdefinition -\starttexdefinition btx:apa:pages:p +% if interaction, we should make these active hyperlinks! + +\starttexdefinition btx:apa:doi-or-url + \btxdoifelse {doi} { + \btxspace + \hyphenatedurl{doi:\btxflush{doi}} + } { + \btxdoif {url} { + \btxspace + \btxlabeltext{apa:Retrieved} + \btxspace + \hyphenatedurl{\btxflush{url}} + } + } +\stoptexdefinition + +\starttexdefinition btx:apa:pages \btxdoif {pages} { \btxspace \btxflush{pages} + \btxperiod + } +\stoptexdefinition + +\starttexdefinition btx:apa:pages:p + \btxdoif {pages} { \btxspace - \btxlabeltext{apa:p} + \btxoneorrange {pages} { + \btxlabeltext{apa:p} + } { + \btxlabeltext{apa:pp} + } + \btxperiod + \btxnbsp + \btxflush{pages} \btxperiod } \stoptexdefinition @@ -331,22 +412,25 @@ \btxdoif {pages} { \btxspace \btxflush{pages} - \btxspace + \btxnbsp \btxlabeltext{apa:pp} \btxperiod } \stoptexdefinition +% this does not seem to comply with APA style - need to verify! + \starttexdefinition btx:apa:pages:pages \btxdoif {pages} { \btxcomma \btxlabeltext{apa:pages} \btxnbsp \btxflush{pages} + \btxperiod } \stoptexdefinition -\starttexdefinition btx:apa:edition:sentense +\starttexdefinition btx:apa:edition:sentence \btxdoif {edition} { \btxspace \btxflush{edition} @@ -371,37 +455,10 @@ \startsetups btx:apa:article \texdefinition{btx:apa:author-or-key-and-year} - \btxdoif {title} { - %btxflush{converters.Word -> title} - \btxflush{Word -> title} - \btxperiod - } - \btxdoifelse {journal} { - % expandedjournal abbreviatedjournal - \texdefinition{btx:apa:italic}{expandedjournal -> journal} - } { - \btxdoif {crossref} { - \btxlabeltext{apa:In} - \btxspace - \btxflush{crossref} - } - } - \btxdoifelse {volume} { - \btxspace - \texdefinition{btx:apa:italic}{volume} - \btxdoif {issue} { - \btxlparent - \btxflush{issue} - \btxrparent - } - \btxdoif {pages} { - \btxcomma - \btxflush{pages} - } - \btxperiod - } { - \texdefinition{btx:apa:pages:pp} - } + \texdefinition{btx:apa:title} + \texdefinition{btx:apa:journal-volume-issue} + \texdefinition{btx:apa:pages} + \texdefinition{btx:apa:doi-or-url} \texdefinition{btx:apa:note} \texdefinition{btx:apa:comment} \stopsetups @@ -481,7 +538,8 @@ } \texdefinition{btx:apa:edition:sentence} \texdefinition{btx:apa:wherefrom}{publisher} - \texdefinition{btx:apa:pages:p}% twice? + \texdefinition{btx:apa:pages:pp}% twice? + \texdefinition{btx:apa:doi-or-url} \texdefinition{btx:apa:note} \stopsetups @@ -775,6 +833,7 @@ \btxcomma \texdefinition{btx:apa:wherefrom}{institution} \texdefinition{btx:apa:pages:p} + \texdefinition{btx:apa:doi-or-url} \texdefinition{btx:apa:note} \stopsetups @@ -812,7 +871,7 @@ \texdefinition{btx:apa:title-and-series} \texdefinition{btx:apa:pages:p} \btxdoif {type} { - \btxlparent + \btxlparenthesis \btxflush{type} \btxrparent } diff --git a/tex/context/base/publ-ini.lua b/tex/context/base/publ-ini.lua index 63da84576..018126573 100644 --- a/tex/context/base/publ-ini.lua +++ b/tex/context/base/publ-ini.lua @@ -134,7 +134,11 @@ statistics.register("publications load time", function() local nofbytes = publicationsstats.nofbytes if nofbytes > 0 then return string.format("%s seconds, %s bytes, %s definitions, %s shortcuts", - statistics.elapsedtime(publications),nofbytes,publicationsstats.nofdefinitions,publicationsstats.nofshortcuts) + statistics.elapsedtime(publications), + nofbytes, + publicationsstats.nofdefinitions or 0, + publicationsstats.nofshortcuts or 0 + ) else return nil end @@ -562,7 +566,7 @@ function commands.oneorrange(dataset,tag,name) commands.doifelse(not d) -- so singular is default end -function commands.firstinrange(dataset,tag,name) +function commands.firstofrange(dataset,tag,name) local d = datasets[dataset].luadata[tag] -- details ? if d then d = d[name] diff --git a/tex/context/base/publ-ini.mkiv b/tex/context/base/publ-ini.mkiv index 46fe46543..161934384 100644 --- a/tex/context/base/publ-ini.mkiv +++ b/tex/context/base/publ-ini.mkiv @@ -229,6 +229,7 @@ \definebtxdataset [\v!standard] +% [\c!language=] % nothing set so use current % \usebtxdataset % [standard] @@ -346,8 +347,10 @@ \unexpanded\def\btxcomma {\removeunwantedspaces,\space} \unexpanded\def\btxcolon {\removeunwantedspaces:\space} \unexpanded\def\btxsemicolon {\removeunwantedspaces;\space} -\unexpanded\def\btxlparent {\removeunwantedspaces\space(} -\unexpanded\def\btxrparent {\removeunwantedspaces)\space} +\unexpanded\def\btxlparent {\removeunwantedspaces\space(} % obsolete +\unexpanded\def\btxrparent {\removeunwantedspaces)\space} % obsolete +\unexpanded\def\btxlparenthesis{\removeunwantedspaces\space(} +\unexpanded\def\btxrparenthesis{\removeunwantedspaces)\space} \unexpanded\def\btxlbracket {\removeunwantedspaces\space[} \unexpanded\def\btxrbracket {\removeunwantedspaces]\space} @@ -589,6 +592,7 @@ % \determinelistcharacteristics[\currentbtxrendering]% \btx_set_rendering_alternative \edef\currentbtxdataset{\btxrenderingparameter\c!dataset}% + \uselanguageparameter\btxdatasetparameter % new \let\currentlist\s!btx \let\currentbtxlist\currentbtxrendering \the\everysetupbtxlistplacement @@ -984,6 +988,7 @@ \def\publ_cite_handle_variant_indeed[#1]% {\usebtxcitevariantstyleandcolor\c!style\c!color + \uselanguageparameter\btxdatasetparameter % new \letbtxcitevariantparameter\c!alternative\currentbtxcitevariant \btxcitevariantparameter\v!left \ctxcommand{btxhandlecite{% @@ -1060,8 +1065,8 @@ %D Whatever helpers: \unexpanded\def\btxsingularplural#1{\ctxcommand{btxsingularorplural("\currentbtxdataset","\currentbtxtag","#1")}} -\unexpanded\def\btxoneorrange #1{\ctxcommand{btxoneorrange("\currentbtxdataset","\currentbtxtag","#1")}} -\unexpanded\def\btxfirstofrange #1{\ctxcommand{btxfirstofrange("\currentbtxdataset","\currentbtxtag","#1")}} +\unexpanded\def\btxoneorrange #1{\ctxcommand{oneorrange("\currentbtxdataset","\currentbtxtag","#1")}} +\unexpanded\def\btxfirstofrange #1{\ctxcommand{firstofrange("\currentbtxdataset","\currentbtxtag","#1")}} \let\btxsingularorplural\btxsingularplural diff --git a/tex/context/base/regi-ini.lua b/tex/context/base/regi-ini.lua index 9484db7c7..c0a23cf42 100644 --- a/tex/context/base/regi-ini.lua +++ b/tex/context/base/regi-ini.lua @@ -15,7 +15,7 @@ runtime.

local commands, context = commands, context local utfchar = utf.char -local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match +local P, Cs, Cc, lpegmatch = lpeg.P, lpeg.Cs, lpeg.Cc, lpeg.match local char, gsub, format, gmatch, byte, match = string.char, string.gsub, string.format, string.gmatch, string.byte, string.match local next = next local insert, remove, fastcopy = table.insert, table.remove, table.fastcopy @@ -99,6 +99,8 @@ local synonyms = { -- backward compatibility list ["windows"] = "cp1252", + ["pdf"] = "pdfdoc", + } local currentregime = "utf" @@ -132,7 +134,7 @@ end setmetatableindex(mapping, loadregime) setmetatableindex(backmapping,loadreverse) -local function translate(line,regime) +local function fromregime(regime,line) if line and #line > 0 then local map = mapping[regime and synonyms[regime] or regime or currentregime] if map then @@ -178,12 +180,15 @@ local function toregime(vector,str,default) -- toregime('8859-1',"abcde Ä","?") local r = c[d] if not r then local t = fastcopy(backmapping[vector]) - setmetatableindex(t, function(t,k) - local v = d - t[k] = v - return v - end) - r = utf.remapper(t) + -- r = utf.remapper(t) -- not good for defaults here + local pattern = Cs((lpeg.utfchartabletopattern(t)/t + lpeg.patterns.utf8character/d + P(1)/d)^0) + r = function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end c[d] = r end return r(str) @@ -204,10 +209,11 @@ local function enable(regime) end end -regimes.toregime = toregime -regimes.translate = translate -regimes.enable = enable -regimes.disable = disable +regimes.toregime = toregime +regimes.fromregime = fromregime +regimes.translate = function(str,regime) return fromregime(regime,str) end +regimes.enable = enable +regimes.disable = disable -- The following function can be used when we want to make sure that -- utf gets passed unharmed. This is needed for modules. @@ -216,7 +222,7 @@ local level = 0 function regimes.process(str,filename,currentline,noflines,coding) if level == 0 and coding ~= "utf-8" then - str = translate(str,currentregime) + str = fromregime(currentregime,str) if trace_translating then report_translating("utf: %s",str) end @@ -403,5 +409,5 @@ end -- local new = regimes.cleanup("cp1252",old) -- report_translating("%s -> %s",old,new) -- local old = "Pozn" .. char(0xE1) .. "mky" --- local new = translate(old,"cp1250") +-- local new = fromregime("cp1250",old) -- report_translating("%s -> %s",old,new) diff --git a/tex/context/base/regi-pdfdoc.lua b/tex/context/base/regi-pdfdoc.lua new file mode 100644 index 000000000..363d3ae0d --- /dev/null +++ b/tex/context/base/regi-pdfdoc.lua @@ -0,0 +1,26 @@ +if not modules then modules = { } end modules ['regi-pdfdoc'] = { + version = 1.001, + comment = "companion to regi-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +return { [0] = + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, + 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203A, 0x2212, 0x2030, 0x201E, 0x201C, 0x201D, 0x2018, + 0x2019, 0x201A, 0x2122, 0xFB01, 0xFB02, 0x0141, 0x0152, 0x0160, 0x0178, 0x017D, 0x0131, 0x0142, 0x0153, 0x0161, 0x017E, 0x009F, + 0x20AC, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0xFFFD, 0x00AE, 0x00AF, + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF, +} diff --git a/tex/context/base/scrp-ini.lua b/tex/context/base/scrp-ini.lua index a6bfe4cf9..fa2bc771f 100644 --- a/tex/context/base/scrp-ini.lua +++ b/tex/context/base/scrp-ini.lua @@ -475,18 +475,19 @@ function scripts.injectors.handler(head) normal_process = handler.injector end if normal_process then + -- wrong: originals are indices ! local f = getfont(start) if f ~= lastfont then originals = fontdata[f].resources if resources then originals = resources.originals else - -- can't happen + originals = nil -- can't happen end lastfont = f end local c = getchar(start) - if originals then + if originals and type(originals) == "number" then c = originals[c] or c end local h = hash[c] diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 540ac5255..f4d86b843 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf index 0b92a158c..9ee869ef6 100644 Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ diff --git a/tex/context/base/strc-bkm.lua b/tex/context/base/strc-bkm.lua index c38ab3c2e..848fe8b93 100644 --- a/tex/context/base/strc-bkm.lua +++ b/tex/context/base/strc-bkm.lua @@ -13,7 +13,9 @@ if not modules then modules = { } end modules ['strc-bkm'] = { -- we should hook the placement into everystoptext ... needs checking -local format, concat, gsub = string.format, table.concat, string.gsub +-- todo: make an lpeg for stripped + +local format, concat, gsub, lower = string.format, table.concat, string.gsub, string.lower local utfvalues = utf.values local settings_to_hash = utilities.parsers.settings_to_hash @@ -101,54 +103,6 @@ function bookmarks.setup(spec) end end --- function bookmarks.place() --- if next(names) then --- local list = lists.filtercollected(names,"all",nil,lists.collected,forced) --- if #list > 0 then --- local levels, noflevels, lastlevel = { }, 0, 1 --- for i=1,#list do --- local li = list[i] --- local metadata = li.metadata --- local name = metadata.name --- if not metadata.nolist or forced[name] then -- and levelmap[name] then --- local titledata = li.titledata --- if titledata then --- local structural = levelmap[name] --- lastlevel = structural or lastlevel --- local title = titledata.bookmark --- if not title or title == "" then --- -- We could typeset the title and then convert it. --- if not structural then --- -- placeholder, todo: bookmarklabel --- title = name .. ": " .. (titledata.title or "?") --- else --- title = titledata.title or "?" --- end --- end --- if numbered[name] then --- local sectiondata = sections.collected[li.references.section] --- local numberdata = li.numberdata --- if sectiondata and numberdata and not numberdata.hidenumber then --- -- we could typeset the number and convert it --- title = concat(sections.typesetnumber(sectiondata,"direct",numberspec,sectiondata)) .. " " .. title --- end --- end --- noflevels = noflevels + 1 --- levels[noflevels] = { --- lastlevel, --- stripped(title), -- can be replaced by converter --- li.references, -- has internal and realpage --- allopen or opened[name] --- } --- end --- end --- end --- bookmarks.finalize(levels) --- end --- function bookmarks.place() end -- prevent second run --- end --- end - function bookmarks.place() if next(names) then local levels = { } @@ -172,11 +126,14 @@ function bookmarks.place() -- add block entry local blockdata = sections.sectionblockdata[block] noflevels = noflevels + 1 + local references = li.references levels[noflevels] = { - 1, -- toplevel - stripped(blockdata.bookmark ~= "" and blockdata.bookmark or block), - li.references, - allopen or opened[name] -- same as first entry + level = 1, -- toplevel + title = stripped(blockdata.bookmark ~= "" and blockdata.bookmark or block), + reference = references, + opened = allopen or opened[name], -- same as first entry + realpage = references and references.realpage or 0, -- handy for later + usedpage = true, } end blockdone = true @@ -206,11 +163,14 @@ function bookmarks.place() end end noflevels = noflevels + 1 + local references = li.references levels[noflevels] = { - lastlevel, - stripped(title), -- can be replaced by converter - li.references, -- has internal and realpage - allopen or opened[name] + level = lastlevel, + title = stripped(title), -- can be replaced by converter + reference = references, -- has internal and realpage + opened = allopen or opened[name], + realpage = references and references.realpage or 0, -- handy for later + usedpage = true, } end end @@ -222,43 +182,238 @@ function bookmarks.place() end function bookmarks.flatten(levels) + if not levels then + -- a plugin messed up + return { } + end -- This function promotes leading structurelements with a higher level -- to the next lower level. Such situations are the result of lack of -- structure: a subject preceding a chapter in a sectionblock. So, the -- following code runs over section blocks as well. (bookmarks-007.tex) local noflevels = #levels if noflevels > 1 then - local skip, start, one = false, 1, levels[1] - local first, block = one[1], one[3].block + local skip = false + local start = 1 + local one = levels[1] + local first = one.level + local block = one.reference.block for i=2,noflevels do - local li = levels[i] - local new, newblock = li[1], li[3].block + local current = levels[i] + local new = current.level + local reference = current.reference + local newblock = type(reference) == "table" and current.reference.block or block if newblock ~= block then - first, block, start, skip = new, newblock, i, false + first = new + block = newblock + start = i + skip = false elseif skip then -- go on elseif new > first then skip = true elseif new < first then for j=start,i-1 do - local lj = levels[j] - local old = lj[1] - lj[1] = new + local previous = levels[j] + local old = previous.level + previous.level = new if trace_bookmarks then - report_bookmarks("promoting entry %a from level %a to %a: %s",j,old,new,lj[2]) + report_bookmarks("promoting entry %a from level %a to %a: %s",j,old,new,previous.title) end end skip = true end end end + return levels +end + +local extras = { } +local lists = { } +local names = { } + +bookmarks.extras = extras + +local function cleanname(name) + return lower(file.basename(name)) +end + +function extras.register(name,levels) + if name and levels then + name = cleanname(name) + local found = names[name] + if found then + lists[found].levels = levels + else + lists[#lists+1] = { + name = name, + levels = levels, + } + names[name] = #lists + end + end +end + +function extras.get(name) + if name then + local found = names[cleanname(name)] + if found then + return lists[found].levels + end + else + return lists + end +end + +function extras.reset(name) + local l, n = { }, { } + if name then + name = cleanname(name) + for i=1,#lists do + local li = lists[i] + local ln = li.name + if name == ln then + -- skip + else + local m = #l + 1 + l[m] = li + n[ln] = m + end + end + end + lists, names = l, n +end + +local function checklists() + for i=1,#lists do + local levels = lists[i].levels + for j=1,#levels do + local entry = levels[j] + local pageindex = entry.pageindex + if pageindex then + entry.reference = figures.getrealpage(pageindex) + entry.pageindex = nil + end + end + end +end + +function extras.tosections(levels) + local sections = { } + local noflists = #lists + for i=1,noflists do + local levels = lists[i].levels + local data = { } + sections[i] = data + for j=1,#levels do + local entry = levels[j] + if entry.usedpage then + local section = entry.section + local d = data[section] + if d then + d[#d+1] = entry + else + data[section] = { entry } + end + end + end + end + return sections +end + +function extras.mergesections(levels,sections) + if not sections or #sections == 0 then + return levels + elseif not levels then + return { } + else + local merge = { } + local noflists = #lists + if #levels == 0 then + local level = 0 + local section = 0 + for i=1,noflists do + local entries = sections[i][0] + if entries then + for i=1,#entries do + local entry = entries[i] + merge[#merge+1] = entry + entry.level = entry.level + level + end + end + end + else + for j=1,#levels do + local entry = levels[j] + merge[#merge+1] = entry + local section = entry.reference.section + local level = entry.level + entry.section = section -- for tracing + for i=1,noflists do + local entries = sections[i][section] + if entries then + for i=1,#entries do + local entry = entries[i] + merge[#merge+1] = entry + entry.level = entry.level + level + end + end + end + end + end + return merge + end +end + +function bookmarks.merge(levels,mode) + return extras.mergesections(levels,extras.tosections()) end +local sequencers = utilities.sequencers +local appendgroup = sequencers.appendgroup +local appendaction = sequencers.appendaction + +local bookmarkactions = sequencers.new { + arguments = "levels,method", + returnvalues = "levels", + results = "levels", +} + +appendgroup(bookmarkactions,"before") -- user +appendgroup(bookmarkactions,"system") -- private +appendgroup(bookmarkactions,"after" ) -- user + +appendaction(bookmarkactions,"system",bookmarks.flatten) +appendaction(bookmarkactions,"system",bookmarks.merge) + function bookmarks.finalize(levels) - -- This function can be overloaded by an optional converter - -- that uses nodes.toutf on a typeset stream. This is something - -- that we will support when the main loop has become a coroutine. - codeinjections.addbookmarks(levels,bookmarks.method) + local method = bookmarks.method or "internal" + checklists() -- so that plugins have the adapted page number + levels = bookmarkactions.runner(levels,method) + if levels and #levels > 0 then + -- normally this is not needed + local purged = { } + for i=1,#levels do + local l = levels[i] + if l.usedpage ~= false then + purged[#purged+1] = l + end + end + -- + codeinjections.addbookmarks(purged,method) + else + -- maybe a plugin messed up + end +end + +function bookmarks.installhandler(what,where,func) + if not func then + where, func = "after", where + end + if where == "before" or where == "after" then + sequencers.appendaction(bookmarkactions,where,func) + else + report_tex("installing bookmark %a handlers in %a is not possible",what,tostring(where)) + end end -- interface diff --git a/tex/context/base/strc-bkm.mkiv b/tex/context/base/strc-bkm.mkiv index 9d2ebd796..5f1acb686 100644 --- a/tex/context/base/strc-bkm.mkiv +++ b/tex/context/base/strc-bkm.mkiv @@ -127,6 +127,38 @@ }}% \to \everysetupbookmark +%D There is a plugin mechanism but this is for experts only. The intermediate +%D data structures are stable. +%D +%D \starttyping +%D \startluacode +%D structures.bookmarks.installhandler("check before","before",function(levels) +%D logs.report("extra bookmarks","before (normal bookmarks)") +%D inspect(levels) +%D logs.report("extra bookmarks","before (extra bookmarks)") +%D inspect(structures.bookmarks.extras.get()) +%D return levels +%D end) +%D structures.bookmarks.installhandler("check after", "after", function(levels) +%D logs.report("extra bookmarks","after (merged bookmarks)") +%D inspect(levels) +%D return levels +%D end) +%D \stopluacode +%D \starttyping +%D +%D This mechanism was added when bookmark inclusion became (optional) part of graphic +%D inclusion (which is needed by Taco). +%D +%D \starttyping +%D \getfiguredimensions[somefile.pdf] +%D \dorecurse {\noffigurepages} { +%D \startTEXpage +%D \externalfigure[somefile.pdf][interaction=bookmark,page=\recurselevel] +%D \stopTEXpage +%D } +%D \starttyping + \protect \endinput % \starttext diff --git a/tex/context/base/strc-doc.lua b/tex/context/base/strc-doc.lua index 38830a4e7..7d3be1620 100644 --- a/tex/context/base/strc-doc.lua +++ b/tex/context/base/strc-doc.lua @@ -136,20 +136,27 @@ function sections.currentid() return #tobesaved end +local lastsaved = 0 + function sections.save(sectiondata) -- local sectionnumber = helpers.simplify(section.sectiondata) -- maybe done earlier local numberdata = sectiondata.numberdata local ntobesaved = #tobesaved if not numberdata or sectiondata.metadata.nolist then - return ntobesaved + -- stay else ntobesaved = ntobesaved + 1 tobesaved[ntobesaved] = numberdata if not collected[ntobesaved] then collected[ntobesaved] = numberdata end - return ntobesaved end + lastsaved = ntobesaved + return ntobesaved +end + +function sections.currentsectionindex() + return lastsaved -- only for special controlled situations end function sections.load() diff --git a/tex/context/base/strc-ref.lua b/tex/context/base/strc-ref.lua index 8a2a668c0..fb1c98c32 100644 --- a/tex/context/base/strc-ref.lua +++ b/tex/context/base/strc-ref.lua @@ -2179,6 +2179,8 @@ local function setreferencerealpage(actions) end end +references.setreferencerealpage = setreferencerealpage + -- we store some analysis data alongside the indexed array -- at this moment only the real reference page is analyzed -- normally such an analysis happens in the backend code diff --git a/tex/context/base/strc-ren.mkiv b/tex/context/base/strc-ren.mkiv index fdf8fb7f4..baeb3ab04 100644 --- a/tex/context/base/strc-ren.mkiv +++ b/tex/context/base/strc-ren.mkiv @@ -516,6 +516,11 @@ \fi \endgroup} +\def\fakedheadnumber{\vphantom{0}} % needed for mathplus + +\unexpanded\def\fakeheadnumbercontent + {\hbox to \zeropoint{\let\getheadnumber\fakedheadnumber\headnumbercontent}} + \unexpanded\def\strc_rendering_inject_number_and_text {\edef\p_command{\headparameter\c!command}% assumes \unexpanded definition \ifx\p_command\empty @@ -608,6 +613,7 @@ \hsize\headtextwidth \fi \noindent + \fakeheadnumbercontent % will also be done in the other ones (force consistency with numbered) \fi \headtextcontent } @@ -637,6 +643,8 @@ \hskip\dimexpr\d_strc_rendering_local_leftoffset+\scratchdistance\relax } } + \else + \fakeheadnumbercontent % will also be done in the other ones (force consistency with numbered) \fi \headtextcontent } @@ -665,6 +673,8 @@ \strut \headnumbercontent \par + else + \fakeheadnumbercontent % will also be done in the other ones (force consistency with numbered) \fi \begstrut \headtextcontent diff --git a/tex/context/base/supp-box.lua b/tex/context/base/supp-box.lua index 3c5a3383d..c69486306 100644 --- a/tex/context/base/supp-box.lua +++ b/tex/context/base/supp-box.lua @@ -42,9 +42,11 @@ local setfield = nuts.setfield local setbox = nuts.setbox local free_node = nuts.free -local copy_list = nuts.copy_list +local flush_list = nuts.flush_list local copy_node = nuts.copy +local copy_list = nuts.copy_list local find_tail = nuts.tail +local traverse_id = nuts.traverse_id local listtoutf = nodes.listtoutf @@ -84,6 +86,19 @@ end commands.hyphenatedlist = hyphenatedlist +-- local function hyphenatedhack(head,pre) +-- pre = tonut(pre) +-- for n in traverse_id(disc_code,tonut(head)) do +-- local hyphen = getfield(n,"pre") +-- if hyphen then +-- flush_list(hyphen) +-- end +-- setfield(n,"pre",copy_list(pre)) +-- end +-- end +-- +-- commands.hyphenatedhack = hyphenatedhack + function commands.showhyphenatedinlist(list) report_hyphenation("show: %s",listtoutf(tonut(list),false,true)) end diff --git a/tex/context/base/supp-box.mkiv b/tex/context/base/supp-box.mkiv index 66f373b72..bc1e30749 100644 --- a/tex/context/base/supp-box.mkiv +++ b/tex/context/base/supp-box.mkiv @@ -1063,7 +1063,7 @@ %D \showhyphens{dohyphenatedword} %D \stoptyping -\def\doshowhyphenatednextbox +\unexpanded\def\doshowhyphenatednextbox {\ctxcommand{showhyphenatedinlist(tex.box[\number\nextbox].list)}} \unexpanded\def\showhyphens{\dowithnextboxcs\doshowhyphenatednextbox\hbox} @@ -1076,7 +1076,7 @@ %D \hyphenatedfile{tufte} %D \stoptyping -\def\dohyphenatednextbox +\unexpanded\def\dohyphenatednextbox {\ctxcommand{hyphenatedlist(tex.box[\number\nextbox].list)}% \unhbox\nextbox} @@ -1084,6 +1084,20 @@ \unexpanded\def\hyphenatedpar {\dowithnextboxcs\dohyphenatednextbox\hbox} \unexpanded\def\hyphenatedfile#1{\dowithnextboxcs\dohyphenatednextbox\hbox{\readfile{#1}\donothing\donothing}} +% D \starttyping +% D \hyphenatedhack{\kern-.25em_}{alongword} +% D \stoptyping +% +% \unexpanded\def\dohyphenatedhackbox +% {\ctxcommand{hyphenatedhack(tex.box[\number\nextbox].list,tex.box[\number\scratchbox].list)}% +% \unhbox\nextbox +% \endgroup} +% +% \unexpanded\def\hyphenatedhack#1% the result of a test, not that useful +% {\begingroup +% \setbox\scratchbox\hbox{#1}% only chars and kerns ! +% \dowithnextboxcs\dohyphenatedhackbox\hbox} + %D \macros %D {processtokens} %D diff --git a/tex/context/base/syst-ini.mkiv b/tex/context/base/syst-ini.mkiv index ff74efecc..29a97b6bd 100644 --- a/tex/context/base/syst-ini.mkiv +++ b/tex/context/base/syst-ini.mkiv @@ -1020,7 +1020,7 @@ %D \PDFTEX, we default to \DVI. Why? \pdfoutput \zerocount -\pdfminorversion \plussix +\pdfminorversion \plusseven \pdfgentounicode \plusone \pdfinclusioncopyfonts \plusone \pdfinclusionerrorlevel \zerocount diff --git a/tex/context/base/typo-dig.lua b/tex/context/base/typo-dig.lua index 67849c6d4..f5b8a6ddd 100644 --- a/tex/context/base/typo-dig.lua +++ b/tex/context/base/typo-dig.lua @@ -103,15 +103,14 @@ end actions[1] = function(head,start,attr) local font = getfont(start) local char = getchar(start) - local unic = chardata[font][char].tounicode - local what = unic and tonumber(unic,16) or char - if charbase[what].category == "nd" then + local unic = chardata[font][char].unicode or char + if charbase[unic].category == "nd" then -- ignore unic tables local oldwidth = getfield(start,"width") local newwidth = getdigitwidth(font) if newwidth ~= oldwidth then if trace_digits then report_digits("digit trigger %a, instance %a, char %C, unicode %U, delta %s", - attr%100,div(attr,100),char,what,newwidth-oldwidth) + attr%100,div(attr,100),char,unic,newwidth-oldwidth) end head, start = nodes.aligned(head,start,start,newwidth,"middle") return head, start, true diff --git a/tex/context/base/typo-tal.lua b/tex/context/base/typo-tal.lua index 1e9c815b3..d41a63dd5 100644 --- a/tex/context/base/typo-tal.lua +++ b/tex/context/base/typo-tal.lua @@ -12,6 +12,8 @@ if not modules then modules = { } end modules ['typo-tal'] = { -- Currently we have two methods: text and number with some downward compatible -- defaulting. +-- We can speed up by saving the current fontcharacters[font] + lastfont. + local next, type = next, type local div = math.div local utfbyte = utf.byte @@ -23,7 +25,7 @@ local glyph_code = nodecodes.glyph local glue_code = nodecodes.glue local fontcharacters = fonts.hashes.characters -local unicodes = fonts.hashes.unicodes +----- unicodes = fonts.hashes.unicodes local categories = characters.categories -- nd local variables = interfaces.variables @@ -135,7 +137,8 @@ function characteralign.handler(originalhead,where) if id == glyph_code then local char = getchar(current) local font = getfont(current) - local unicode = unicodes[font][char] + -- local unicode = unicodes[font][char] + local unicode = fontcharacters[font][char].unicode or char -- ignore tables if not unicode then -- no unicode so forget about it elseif unicode == separator then @@ -213,7 +216,8 @@ function characteralign.handler(originalhead,where) if id == glyph_code then local char = getchar(current) local font = getfont(current) - local unicode = unicodes[font][char] + -- local unicode = unicodes[font][char] + local unicode = fontcharacters[font][char].unicode or char -- ignore tables if not unicode then -- no unicode so forget about it elseif unicode == separator then diff --git a/tex/context/base/util-sta.lua b/tex/context/base/util-sta.lua index 1a61ec4e6..27ab5a624 100644 --- a/tex/context/base/util-sta.lua +++ b/tex/context/base/util-sta.lua @@ -81,6 +81,8 @@ end function stacker.new(name) + local report = logs.reporter("stacker",name or nil) + local s local stack = { } @@ -126,8 +128,18 @@ function stacker.new(name) end end - local tops = { } - local top, switch + local tops = { } + local top = nil + local switch = nil + + local function resolve_reset(mode) + if #tops > 0 then + report("resetting %s left-over states of %a",#tops,name) + end + tops = { } + top = nil + switch = nil + end local function resolve_begin(mode) if mode then @@ -206,8 +218,7 @@ function stacker.new(name) local function resolve_end() -- resolve_step(s.unset) - local noftop = #top - if noftop > 0 then + if #tops > 0 then -- was #top brrr local result = s.stop(s,top,1,#top) remove(tops) top = tops[#tops] @@ -224,8 +235,6 @@ function stacker.new(name) resolve_end() end - local report = logs.reporter("stacker",name or nil) - s = { name = name or "unknown", unset = -1, @@ -240,6 +249,7 @@ function stacker.new(name) resolve_begin = resolve_begin, resolve_step = resolve_step, resolve_end = resolve_end, + resolve_reset = resolve_reset, } return s -- we can overload functions diff --git a/tex/context/base/util-tpl.lua b/tex/context/base/util-tpl.lua index 67d058221..bd0e261a9 100644 --- a/tex/context/base/util-tpl.lua +++ b/tex/context/base/util-tpl.lua @@ -52,7 +52,7 @@ local sqlescape = lpeg.replacer { -- { "\t", "\\t" }, } -local sqlquoted = lpeg.Cs(lpeg.Cc("'") * sqlescape * lpeg.Cc("'")) +local sqlquoted = Cs(Cc("'") * sqlescape * Cc("'")) lpegpatterns.sqlescape = sqlescape lpegpatterns.sqlquoted = sqlquoted @@ -111,13 +111,21 @@ local luaescaper = escapers.lua local quotedluaescaper = quotedescapers.lua local function replacekeyunquoted(s,t,how,recurse) -- ".. \" " - local escaper = how and escapers[how] or luaescaper - return escaper(replacekey(s,t,how,recurse)) + if how == false then + return replacekey(s,t,how,recurse) + else + local escaper = how and escapers[how] or luaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local function replacekeyquoted(s,t,how,recurse) -- ".. \" " - local escaper = how and quotedescapers[how] or quotedluaescaper - return escaper(replacekey(s,t,how,recurse)) + if how == false then + return replacekey(s,t,how,recurse) + else + local escaper = how and quotedescapers[how] or quotedluaescaper + return escaper(replacekey(s,t,how,recurse)) + end end local single = P("%") -- test %test% test : resolves test @@ -188,3 +196,5 @@ end -- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" })) -- inspect(utilities.templates.resolve({ one = "%two%", two = "two", three = "%three%" })) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false,true)) +-- inspect(utilities.templates.replace("test %one% test", { one = "%two%", two = "two" },false)) diff --git a/tex/context/base/x-asciimath.lua b/tex/context/base/x-asciimath.lua index 60fbb0b5a..ee4fb5134 100644 --- a/tex/context/base/x-asciimath.lua +++ b/tex/context/base/x-asciimath.lua @@ -108,10 +108,12 @@ local reserved = { ["sqrt"] = "\\rootradical{}", ["root"] = "\\rootradical", ["frac"] = "\\frac", - ["stackrel"] = "\\stackrel", - -- ["text"] = "\\mathoptext", + -- ["stackrel"] = "\\stackrel", + ["stackrel"] = "\\asciimathstackrel", + -- ["text"] = "\\asciimathoptext", -- ["bb"] = "\\bb", ["hat"] = "\\widehat", + ["bar"] = "\\overbar", ["overbar"] = "\\overbar", ["underline"] = "\\underline", ["vec"] = "\\overrightarrow", @@ -660,10 +662,11 @@ local reserved = { } local isbinary = { - ["\\frac"] = true, - ["\\root"] = true, - ["\\rootradical"] = true, - ["\\stackrel"] = true, + ["\\frac"] = true, + ["\\root"] = true, + ["\\rootradical"] = true, + ["\\stackrel"] = true, + ["\\asciimathstackrel"] = true, } local isunary = { @@ -672,8 +675,10 @@ local isunary = { -- ["\\bb"] = true, ["\\text"] = true, -- mathoptext ["\\mathoptext"] = true, -- mathoptext + ["\\asciimathoptext"]= true, -- mathoptext ["\\hat"] = true, -- widehat ["\\widehat"] = true, -- widehat + ["\\bar"] = true, -- ["\\overbar"] = true, -- ["\\underline"] = true, -- ["\\vec"] = true, -- overrightarrow @@ -785,12 +790,12 @@ local p_reserved = local p_text = P("text") * p_spaces^0 - * Cc("\\mathoptext") + * Cc("\\asciimathoptext") * ( -- maybe balanced Cs( P("{") * (1-P("}"))^0 * P("}") ) + Cs((P("(")/"{") * (1-P(")"))^0 * (P(")")/"}")) ) - + Cc("\\mathoptext") * Cs(Cc("{") * patterns.undouble * Cc("}")) + + Cc("\\asciimathoptext") * Cs(Cc("{") * patterns.undouble * Cc("}")) -- either map to \left or map to \left\name diff --git a/tex/context/base/x-asciimath.mkiv b/tex/context/base/x-asciimath.mkiv index acdcae64a..6a9595a59 100644 --- a/tex/context/base/x-asciimath.mkiv +++ b/tex/context/base/x-asciimath.mkiv @@ -119,11 +119,25 @@ %D %D In retrospect I sometimes wonder if the energy put into constantly adapting to %D the fashion of the day pays off. Probably not. It definitely doesn't pay of. +%D +%D More complex crap: +%D +%D 1: $x + \stackrel{comment}{\stackrel{\utfchar{"23DE}}{yyyyyyyy}} = y$ \blank +%D 2: \asciimath{x + stackrel{\utfchar{"23DE}}{yyyyyyyy} = y} \blank +%D 3: \asciimath{x + stackrel{yyyyyyyy}{\utfchar{"23DE}} = y} \blank +%D 4: \asciimath{x + stackrel{"comment"}{stackrel{\utfchar{"23DE}}{yyyyyyyy}} = y} \blank \unprotect \writestatus{asciimath}{beware, this is an experimental (m4all only) module} +%D Hacks: + +\unexpanded\def\asciimathoptext #1{\ifmmode\mathop{\text{#1}}\else#1\fi} +\unexpanded\def\asciimathoptexttraced#1{\ifmmode\mathop{\text{\color[darkgreen]{#1}}}\else\color[darkgreen]{#1}\fi} + +\unexpanded\def\asciimathstackrel #1#2{\mathematics{\mathop{\let\limits\relax\mover{#2}{#1}}}} + %D The core commands: \unexpanded\def\asciimath#1% @@ -168,8 +182,7 @@ \unexpanded\def\ShowAsciiMathStart {\begingroup - \let\normalmathoptext\mathoptext - \unexpanded\def\mathoptext##1{\normalmathoptext{\color[darkgreen]{##1}}}% + \let\asciimathoptext\asciimathoptexttraced \setuptyping[\v!buffer][\c!before=,\c!after=] \setupmargindata[\v!left][\c!style=]} diff --git a/tex/context/base/x-mathml.xsd b/tex/context/base/x-mathml.xsd index 17f0bea2a..1c29452b0 100644 --- a/tex/context/base/x-mathml.xsd +++ b/tex/context/base/x-mathml.xsd @@ -3,9 +3,9 @@ - + - - + + diff --git a/tex/context/fonts/treatments.lfg b/tex/context/fonts/treatments.lfg index 07bb51def..40bac427c 100644 --- a/tex/context/fonts/treatments.lfg +++ b/tex/context/fonts/treatments.lfg @@ -25,50 +25,50 @@ local fix_unifraktur = { end, } -local fix_lmmonoregular = { - -- - -- there are now some extra safeguards for idris - -- - comment = "wrong widths of some glyphs", - fixes = function(data) - report("fixing some wrong widths") - local unicodes = data.resources.unicodes - local descriptions = data.descriptions - local function getdescription(name) - local unicode = unicodes[name] - if not unicode then - report("no valid unicode for %a",name) - return - end - local description = descriptions[unicode] - if not description then - report("no glyph names %a in font",name) - return - end - return description - end - local zero = getdescription("zero") - if not zero then - return - end - local defaultwidth = zero.width - local function setwidth(name) - local data = getdescription(name) - if data then - data.width = defaultwidth - end - end - setwidth("six") - setwidth("nine") - setwidth("caron") - setwidth("perthousand") - setwidth("numero") - setwidth("caron.cap") - setwidth("six.taboldstyle") - setwidth("nine.taboldstyle") - setwidth("dollar.oldstyle") - end -} +-- local fix_lmmonoregular = { +-- -- +-- -- there are now some extra safeguards for idris +-- -- +-- comment = "wrong widths of some glyphs", +-- fixes = function(data) +-- report("fixing some wrong widths") +-- local unicodes = data.resources.unicodes +-- local descriptions = data.descriptions +-- local function getdescription(name) +-- local unicode = unicodes[name] +-- if not unicode then +-- report("no valid unicode for %a",name) +-- return +-- end +-- local description = descriptions[unicode] +-- if not description then +-- report("no glyph names %a in font",name) +-- return +-- end +-- return description +-- end +-- local zero = getdescription("zero") +-- if not zero then +-- return +-- end +-- local defaultwidth = zero.width +-- local function setwidth(name) +-- local data = getdescription(name) +-- if data then +-- data.width = defaultwidth +-- end +-- end +-- setwidth("six") +-- setwidth("nine") +-- setwidth("caron") +-- setwidth("perthousand") +-- setwidth("numero") +-- setwidth("caron.cap") +-- setwidth("six.taboldstyle") +-- setwidth("nine.taboldstyle") +-- setwidth("dollar.oldstyle") +-- end +-- } return { name = "treatments", diff --git a/tex/context/interface/keys-cs.xml b/tex/context/interface/keys-cs.xml index e32918566..b261a74e4 100644 --- a/tex/context/interface/keys-cs.xml +++ b/tex/context/interface/keys-cs.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/context/interface/keys-de.xml b/tex/context/interface/keys-de.xml index 48329fabf..b9f6c400b 100644 --- a/tex/context/interface/keys-de.xml +++ b/tex/context/interface/keys-de.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/context/interface/keys-en.xml b/tex/context/interface/keys-en.xml index dd8de7312..50e15c05b 100644 --- a/tex/context/interface/keys-en.xml +++ b/tex/context/interface/keys-en.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/context/interface/keys-fr.xml b/tex/context/interface/keys-fr.xml index 4a9f2b78b..15ebc93c6 100644 --- a/tex/context/interface/keys-fr.xml +++ b/tex/context/interface/keys-fr.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/context/interface/keys-it.xml b/tex/context/interface/keys-it.xml index ce1be9a61..e256c965b 100644 --- a/tex/context/interface/keys-it.xml +++ b/tex/context/interface/keys-it.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/context/interface/keys-nl.xml b/tex/context/interface/keys-nl.xml index bdcf2a96a..eebc048c0 100644 --- a/tex/context/interface/keys-nl.xml +++ b/tex/context/interface/keys-nl.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/context/interface/keys-pe.xml b/tex/context/interface/keys-pe.xml index 77b137293..926cb4f1e 100644 --- a/tex/context/interface/keys-pe.xml +++ b/tex/context/interface/keys-pe.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/context/interface/keys-ro.xml b/tex/context/interface/keys-ro.xml index ad07ea880..f07634521 100644 --- a/tex/context/interface/keys-ro.xml +++ b/tex/context/interface/keys-ro.xml @@ -659,7 +659,7 @@ - + diff --git a/tex/generic/context/luatex/luatex-basics-gen.lua b/tex/generic/context/luatex/luatex-basics-gen.lua index c19a49af3..e7cdc7b39 100644 --- a/tex/generic/context/luatex/luatex-basics-gen.lua +++ b/tex/generic/context/luatex/luatex-basics-gen.lua @@ -351,7 +351,12 @@ end -- function table.setmetatableindex(t,f) + if type(t) ~= "table" then + f = f or t + t = { } + end setmetatable(t,{ __index = f }) + return t end -- helper for plain: diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 98e98d806..45b6f024a 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 09/27/14 14:46:07 +-- merge date : 10/07/14 11:14:05 do -- begin closure to overcome local limits and interference @@ -149,6 +149,8 @@ patterns.utfbom_16_le=utfbom_16_le patterns.utfbom_8=utfbom_8 patterns.utf_16_be_nl=P("\000\r\000\n")+P("\000\r")+P("\000\n") patterns.utf_16_le_nl=P("\r\000\n\000")+P("\r\000")+P("\n\000") +patterns.utf_32_be_nl=P("\000\000\000\r\000\000\000\n")+P("\000\000\000\r")+P("\000\000\000\n") +patterns.utf_32_le_nl=P("\r\000\000\000\n\000\000\000")+P("\r\000\000\000")+P("\n\000\000\000") patterns.utf8one=R("\000\127") patterns.utf8two=R("\194\223")*utf8next patterns.utf8three=R("\224\239")*utf8next*utf8next @@ -731,6 +733,65 @@ local case_2=period*(digit-trailingzeros)^1*(trailingzeros/"") local number=digit^1*(case_1+case_2) local stripper=Cs((number+1)^0) lpeg.patterns.stripzeros=stripper +local byte_to_HEX={} +local byte_to_hex={} +local byte_to_dec={} +local hex_to_byte={} +for i=0,255 do + local H=format("%02X",i) + local h=format("%02x",i) + local d=format("%03i",i) + local c=char(i) + byte_to_HEX[c]=H + byte_to_hex[c]=h + byte_to_dec[c]=d + hex_to_byte[h]=c + hex_to_byte[H]=c +end +local hextobyte=P(2)/hex_to_byte +local bytetoHEX=P(1)/byte_to_HEX +local bytetohex=P(1)/byte_to_hex +local bytetodec=P(1)/byte_to_dec +local hextobytes=Cs(hextobyte^0) +local bytestoHEX=Cs(bytetoHEX^0) +local bytestohex=Cs(bytetohex^0) +local bytestodec=Cs(bytetodec^0) +patterns.hextobyte=hextobyte +patterns.bytetoHEX=bytetoHEX +patterns.bytetohex=bytetohex +patterns.bytetodec=bytetodec +patterns.hextobytes=hextobytes +patterns.bytestoHEX=bytestoHEX +patterns.bytestohex=bytestohex +patterns.bytestodec=bytestodec +function string.toHEX(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end +function string.tohex(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestohex,s) + end +end +function string.todec(s) + if not s or s=="" then + return s + else + return lpegmatch(bytestodec,s) + end +end +function string.tobytes(s) + if not s or s=="" then + return s + else + return lpegmatch(hextobytes,s) + end +end end -- closure @@ -895,7 +956,7 @@ local function compare(a,b) if ta==tb then return a=private or (unic>=0xE000 and unic<=0xF8FF) or unic==0xFFFE or unic==0xFFFF then + local r=overloads[name] + if r then + glyph.unicode=r.unicode + elseif unic==-1 or unic>=private or (unic>=0xE000 and unic<=0xF8FF) or unic==0xFFFE or unic==0xFFFF then local unicode=lumunic and lumunic[name] or unicodevector[name] if unicode then - originals[index]=unicode - tounicode[index]=tounicode16(unicode,name) + glyph.unicode=unicode ns=ns+1 end if (not unicode) and usedmap then @@ -5283,8 +5392,7 @@ function mappings.addtounicode(data,filename) if foundindex then unicode=cidcodes[foundindex] if unicode then - originals[index]=unicode - tounicode[index]=tounicode16(unicode,name) + glyph.unicode=unicode ns=ns+1 else local reference=cidnames[foundindex] @@ -5293,21 +5401,18 @@ function mappings.addtounicode(data,filename) if foundindex then unicode=cidcodes[foundindex] if unicode then - originals[index]=unicode - tounicode[index]=tounicode16(unicode,name) + glyph.unicode=unicode ns=ns+1 end end if not unicode or unicode=="" then local foundcodes,multiple=lpegmatch(uparser,reference) if foundcodes then - originals[index]=foundcodes + glyph.unicode=foundcodes if multiple then - tounicode[index]=tounicode16sequence(foundcodes) nl=nl+1 unicode=true else - tounicode[index]=tounicode16(foundcodes,name) ns=ns+1 unicode=foundcodes end @@ -5345,30 +5450,30 @@ function mappings.addtounicode(data,filename) end if n==0 then elseif n==1 then - originals[index]=t[1] - tounicode[index]=tounicode16(t[1],name) + glyph.unicode=t[1] else - originals[index]=t - tounicode[index]=tounicode16sequence(t) + glyph.unicode=t end nl=nl+1 end if not unicode or unicode=="" then local foundcodes,multiple=lpegmatch(uparser,name) if foundcodes then + glyph.unicode=foundcodes if multiple then - originals[index]=foundcodes - tounicode[index]=tounicode16sequence(foundcodes,name) nl=nl+1 unicode=true else - originals[index]=foundcodes - tounicode[index]=tounicode16(foundcodes,name) ns=ns+1 unicode=foundcodes end end end + local r=overloads[unicode] + if r then + unicode=r.unicode + glyph.unicode=unicode + end if not unicode then missing[name]=true end @@ -5387,8 +5492,7 @@ function mappings.addtounicode(data,filename) else return end - local index=descriptions[code].index - if tounicode[index] then + if descriptions[code].unicode then return end local g=guess[variant] @@ -5453,37 +5557,31 @@ function mappings.addtounicode(data,filename) end end end + local orphans=0 + local guessed=0 for k,v in next,guess do if type(v)=="number" then - guess[k]=tounicode16(v) + descriptions[unicodes[k]].unicode=descriptions[v].unicode or v + guessed=guessed+1 else local t=nil local l=lower(k) local u=unicodes[l] if not u then + orphans=orphans+1 elseif u==-1 or u>=private or (u>=0xE000 and u<=0xF8FF) or u==0xFFFE or u==0xFFFF then - t=tounicode[descriptions[u].index] - else - end - if t then - guess[k]=t + local unicode=descriptions[u].unicode + if unicode then + descriptions[unicodes[k]].unicode=unicode + guessed=guessed+1 + else + orphans=orphans+1 + end else - guess[k]="FFFD" + orphans=orphans+1 end end end - local orphans=0 - local guessed=0 - for k,v in next,guess do - tounicode[descriptions[unicodes[k]].index]=v - if v=="FFFD" then - orphans=orphans+1 - guess[k]=false - else - guessed=guessed+1 - guess[k]=true - end - end if trace_loading and orphans>0 or guessed>0 then report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans) end @@ -5492,9 +5590,17 @@ function mappings.addtounicode(data,filename) for unic,glyph in table.sortedhash(descriptions) do local name=glyph.name local index=glyph.index - local toun=tounicode[index] - if toun then - report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun) + local unicode=glyph.unicode + if unicode then + if type(unicode)=="table" then + local unicodes={} + for i=1,#unicode do + unicodes[i]=formatters("%U",unicode[i]) + end + report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,unic,unicodes) + else + report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,unic,unicode) + end else report_fonts("internal slot %U, name %a, unicode %U",index,name,unic) end @@ -5675,6 +5781,10 @@ local function read_from_tfm(specification) features.encoding=encoding end end + properties.haskerns=true + properties.haslogatures=true + resources.unicodes={} + resources.lookuptags={} return tfmdata end end @@ -5730,6 +5840,7 @@ local trace_indexing=false trackers.register("afm.indexing",function(v) trace_in local trace_loading=false trackers.register("afm.loading",function(v) trace_loading=v end) local trace_defining=false trackers.register("fonts.defining",function(v) trace_defining=v end) local report_afm=logs.reporter("fonts","afm loading") +local setmetatableindex=table.setmetatableindex local findbinfile=resolvers.findbinfile local definers=fonts.definers local readers=fonts.readers @@ -5738,7 +5849,7 @@ local afm=constructors.newhandler("afm") local pfb=constructors.newhandler("pfb") local afmfeatures=constructors.newfeatures("afm") local registerafmfeature=afmfeatures.register -afm.version=1.410 +afm.version=1.500 afm.cache=containers.define("fonts","afm",afm.version,true) afm.autoprefixed=true afm.helpdata={} @@ -5746,6 +5857,7 @@ afm.syncspace=true afm.addligatures=true afm.addtexligatures=true afm.addkerns=true +local overloads=fonts.mappings.overloads local applyruntimefixes=fonts.treatments and fonts.treatments.applyfixes local function setmode(tfmdata,value) if value then @@ -5933,7 +6045,7 @@ local function readafm(filename) return nil end end -local addkerns,addligatures,addtexligatures,unify,normalize +local addkerns,addligatures,addtexligatures,unify,normalize,fixnames function afm.load(filename) filename=resolvers.findfile(filename,'afm') or "" if filename~="" and not fonts.names.ignoredfile(filename) then @@ -5976,6 +6088,7 @@ function afm.load(filename) addkerns(data) end normalize(data) + fixnames(data) report_afm("add tounicode data") fonts.mappings.addtounicode(data,filename) data.size=size @@ -5983,6 +6096,7 @@ function afm.load(filename) data.pfbsize=pfbsize data.pfbtime=pfbtime report_afm("saving %a in cache",name) + data.resources.unicodes=nil data=containers.write(afm.cache,name,data) data=containers.read(afm.cache,name) end @@ -6042,18 +6156,30 @@ unify=function(data,filename) local filename=resources.filename or file.removesuffix(file.basename(filename)) resources.filename=resolvers.unresolve(filename) resources.unicodes=unicodes - resources.marks={} - resources.names=names + resources.marks={} resources.private=private end normalize=function(data) end +fixnames=function(data) + for k,v in next,data.descriptions do + local n=v.name + local r=overloads[n] + if r then + local name=r.name + if trace_indexing then + report_afm("renaming characters %a to %a",n,name) + end + v.name=name + v.unicode=r.unicode + end + end +end local addthem=function(rawdata,ligatures) if ligatures then local descriptions=rawdata.descriptions local resources=rawdata.resources local unicodes=resources.unicodes - local names=resources.names for ligname,ligdata in next,ligatures do local one=descriptions[unicodes[ligname]] if one then @@ -6186,8 +6312,8 @@ local function copytotfm(data) local filename=constructors.checkedfilename(resources) local fontname=metadata.fontname or metadata.fullname local fullname=metadata.fullname or metadata.fontname - local endash=unicodes['space'] - local emdash=unicodes['emdash'] + local endash=0x0020 + local emdash=0x2014 local spacer="space" local spaceunits=500 local monospaced=metadata.isfixedpitch @@ -6241,7 +6367,7 @@ local function copytotfm(data) if charxheight then parameters.x_height=charxheight else - local x=unicodes['x'] + local x=0x0078 if x then local x=descriptions[x] if x then @@ -6288,7 +6414,34 @@ function afm.setfeatures(tfmdata,features) return {} end end -local function checkfeatures(specification) +local function addtables(data) + local resources=data.resources + local lookuptags=resources.lookuptags + local unicodes=resources.unicodes + if not lookuptags then + lookuptags={} + resources.lookuptags=lookuptags + end + setmetatableindex(lookuptags,function(t,k) + local v=type(k)=="number" and ("lookup "..k) or k + t[k]=v + return v + end) + if not unicodes then + unicodes={} + resources.unicodes=unicodes + setmetatableindex(unicodes,function(t,k) + setmetatableindex(unicodes,nil) + for u,d in next,data.descriptions do + local n=d.name + if n then + t[n]=u + end + end + return rawget(t,k) + end) + end + constructors.addcoreunicodes(unicodes) end local function afmtotfm(specification) local afmname=specification.filename or specification.name @@ -6315,6 +6468,7 @@ local function afmtotfm(specification) if not tfmdata then local rawdata=afm.load(afmname) if rawdata and next(rawdata) then + addtables(rawdata) adddimensions(rawdata) tfmdata=copytotfm(rawdata) if tfmdata and next(tfmdata) then @@ -6349,6 +6503,7 @@ end local function prepareligatures(tfmdata,ligatures,value) if value then local descriptions=tfmdata.descriptions + local hasligatures=false for unicode,character in next,tfmdata.characters do local description=descriptions[unicode] local dligatures=description.ligatures @@ -6364,8 +6519,10 @@ local function prepareligatures(tfmdata,ligatures,value) type=0 } end + hasligatures=true end end + tfmdata.properties.hasligatures=hasligatures end end local function preparekerns(tfmdata,kerns,value) @@ -6374,6 +6531,7 @@ local function preparekerns(tfmdata,kerns,value) local resources=rawdata.resources local unicodes=resources.unicodes local descriptions=tfmdata.descriptions + local haskerns=false for u,chr in next,tfmdata.characters do local d=descriptions[u] local newkerns=d[kerns] @@ -6389,8 +6547,10 @@ local function preparekerns(tfmdata,kerns,value) kerns[uk]=v end end + haskerns=true end end + tfmdata.properties.haskerns=haskerns end end local list={ @@ -6820,6 +6980,8 @@ local reversed,concat,remove,sortedkeys=table.reversed,table.concat,table.remove local ioflush=io.flush local fastcopy,tohash,derivetable=table.fastcopy,table.tohash,table.derive local formatters=string.formatters +local P,R,S,C,Ct,lpegmatch=lpeg.P,lpeg.R,lpeg.S,lpeg.C,lpeg.Ct,lpeg.match +local setmetatableindex=table.setmetatableindex local allocate=utilities.storage.allocate local registertracker=trackers.register local registerdirective=directives.register @@ -6834,26 +6996,27 @@ local trace_dynamics=false registertracker("otf.dynamics",function(v) trace_dyna local trace_sequences=false registertracker("otf.sequences",function(v) trace_sequences=v end) local trace_markwidth=false registertracker("otf.markwidth",function(v) trace_markwidth=v end) local trace_defining=false registertracker("fonts.defining",function(v) trace_defining=v end) +local compact_lookups=true registertracker("otf.compactlookups",function(v) compact_lookups=v end) +local purge_names=true registertracker("otf.purgenames",function(v) purge_names=v end) local report_otf=logs.reporter("fonts","otf loading") local fonts=fonts local otf=fonts.handlers.otf otf.glists={ "gsub","gpos" } -otf.version=2.762 +otf.version=2.802 otf.cache=containers.define("fonts","otf",otf.version,true) local fontdata=fonts.hashes.identifiers local chardata=characters and characters.data -local otffeatures=fonts.constructors.newfeatures("otf") +local definers=fonts.definers +local readers=fonts.readers +local constructors=fonts.constructors +local otffeatures=constructors.newfeatures("otf") local registerotffeature=otffeatures.register local enhancers=allocate() otf.enhancers=enhancers local patches={} enhancers.patches=patches -local definers=fonts.definers -local readers=fonts.readers -local constructors=fonts.constructors local forceload=false local cleanup=0 -local usemetatables=false local packdata=true local syncspace=true local forcenotdef=false @@ -6872,7 +7035,6 @@ formats.ttc="truetype" formats.dfont="truetype" registerdirective("fonts.otf.loader.cleanup",function(v) cleanup=tonumber(v) or (v and 1) or 0 end) registerdirective("fonts.otf.loader.force",function(v) forceload=v end) -registerdirective("fonts.otf.loader.usemetatables",function(v) usemetatables=v end) registerdirective("fonts.otf.loader.pack",function(v) packdata=v end) registerdirective("fonts.otf.loader.syncspace",function(v) syncspace=v end) registerdirective("fonts.otf.loader.forcenotdef",function(v) forcenotdef=v end) @@ -7017,6 +7179,8 @@ local ordered_enhancers={ "check encoding", "add duplicates", "cleanup tables", + "compact lookups", + "purge names", } local actions=allocate() local before=allocate() @@ -7207,7 +7371,7 @@ function otf.load(filename,sub,featurefile) goodies={}, helpers={ tounicodelist=splitter, - tounicodetable=lpeg.Ct(splitter), + tounicodetable=Ct(splitter), }, } starttiming(data) @@ -7250,6 +7414,34 @@ function otf.load(filename,sub,featurefile) report_otf("loading from cache using hash %a",hash) end enhance("unpack",data,filename,nil,false) + local resources=data.resources + local lookuptags=resources.lookuptags + local unicodes=resources.unicodes + if not lookuptags then + lookuptags={} + resources.lookuptags=lookuptags + end + setmetatableindex(lookuptags,function(t,k) + local v=type(k)=="number" and ("lookup "..k) or k + t[k]=v + return v + end) + if not unicodes then + unicodes={} + resources.unicodes=unicodes + setmetatableindex(unicodes,function(t,k) + setmetatableindex(unicodes,nil) + for u,d in next,data.descriptions do + local n=d.name + if n then + t[n]=u + else + end + end + return rawget(t,k) + end) + end + constructors.addcoreunicodes(unicodes) if applyruntimefixes then applyruntimefixes(filename,data) end @@ -7286,34 +7478,22 @@ actions["add dimensions"]=function(data,filename) local defaultheight=resources.defaultheight or 0 local defaultdepth=resources.defaultdepth or 0 local basename=trace_markwidth and file.basename(filename) - if usemetatables then - for _,d in next,descriptions do - local wd=d.width - if not wd then - d.width=defaultwidth - elseif trace_markwidth and wd~=0 and d.class=="mark" then - report_otf("mark %a with width %b found in %a",d.name or "",wd,basename) - end - setmetatable(d,mt) + for _,d in next,descriptions do + local bb,wd=d.boundingbox,d.width + if not wd then + d.width=defaultwidth + elseif trace_markwidth and wd~=0 and d.class=="mark" then + report_otf("mark %a with width %b found in %a",d.name or "",wd,basename) end - else - for _,d in next,descriptions do - local bb,wd=d.boundingbox,d.width - if not wd then - d.width=defaultwidth - elseif trace_markwidth and wd~=0 and d.class=="mark" then - report_otf("mark %a with width %b found in %a",d.name or "",wd,basename) - end - if bb then - local ht,dp=bb[4],-bb[2] - if ht==0 or ht<0 then - else - d.height=ht - end - if dp==0 or dp<0 then - else - d.depth=dp - end + if bb then + local ht,dp=bb[4],-bb[2] + if ht==0 or ht<0 then + else + d.height=ht + end + if dp==0 or dp<0 then + else + d.depth=dp end end end @@ -7878,9 +8058,14 @@ local function t_hashed(t,cache) local ti=t[i] local tih=cache[ti] if not tih then - tih={} - for i=1,#ti do - tih[ti[i]]=true + local tn=#ti + if tn==1 then + tih={ [ti[1]]=true } + else + tih={} + for i=1,tn do + tih[ti[i]]=true + end end cache[ti]=tih end @@ -7893,12 +8078,17 @@ local function t_hashed(t,cache) end local function s_hashed(t,cache) if t then - local ht={} local tf=t[1] - for i=1,#tf do - ht[i]={ [tf[i]]=true } + local nf=#tf + if nf==1 then + return { [tf[1]]=true } + else + local ht={} + for i=1,nf do + ht[i]={ [tf[i]]=true } + end + return ht end - return ht else return nil end @@ -8326,7 +8516,7 @@ actions["check glyphs"]=function(data,filename,raw) description.glyph=nil end end -local valid=(lpeg.R("\x00\x7E")-lpeg.S("(){}[]<>%/ \n\r\f\v"))^0*lpeg.P(-1) +local valid=(R("\x00\x7E")-S("(){}[]<>%/ \n\r\f\v"))^0*P(-1) local function valid_ps_name(str) return str and str~="" and #str<64 and lpegmatch(valid,str) and true or false end @@ -8380,8 +8570,17 @@ actions["check metadata"]=function(data,filename,raw) end end actions["cleanup tables"]=function(data,filename,raw) + local duplicates=data.resources.duplicates + if duplicates then + for k,v in next,duplicates do + if #v==1 then + duplicates[k]=v[1] + end + end + end data.resources.indices=nil - data.helpers=nil + data.resources.unicodes=nil + data.helpers=nil end actions["reorganize glyph lookups"]=function(data,filename,raw) local resources=data.resources @@ -8486,6 +8685,142 @@ actions["reorganize glyph anchors"]=function(data,filename,raw) end end end +local bogusname=(P("uni")+P("u"))*R("AF","09")^4+(P("index")+P("glyph")+S("Ii")*P("dentity")*P(".")^0)*R("09")^1 +local uselessname=(1-bogusname)^0*bogusname +actions["purge names"]=function(data,filename,raw) + if purge_names then + local n=0 + for u,d in next,data.descriptions do + if lpegmatch(uselessname,d.name) then + n=n+1 + d.name=nil + end + end + if n>0 then + report_otf("%s bogus names removed",n) + end + end +end +actions["compact lookups"]=function(data,filename,raw) + if not compact_lookups then + report_otf("not compacting") + return + end + local last=0 + local tags=table.setmetatableindex({}, + function(t,k) + last=last+1 + t[k]=last + return last + end + ) + local descriptions=data.descriptions + local resources=data.resources + for u,d in next,descriptions do + local slookups=d.slookups + if type(slookups)=="table" then + local s={} + for k,v in next,slookups do + s[tags[k]]=v + end + d.slookups=s + end + local mlookups=d.mlookups + if type(mlookups)=="table" then + local m={} + for k,v in next,mlookups do + m[tags[k]]=v + end + d.mlookups=m + end + local kerns=d.kerns + if type(kerns)=="table" then + local t={} + for k,v in next,kerns do + t[tags[k]]=v + end + d.kerns=t + end + end + local lookups=data.lookups + if lookups then + local l={} + for k,v in next,lookups do + local rules=v.rules + if rules then + for i=1,#rules do + local l=rules[i].lookups + if type(l)=="table" then + for i=1,#l do + l[i]=tags[l[i]] + end + end + end + end + l[tags[k]]=v + end + data.lookups=l + end + local lookups=resources.lookups + if lookups then + local l={} + for k,v in next,lookups do + local s=v.subtables + if type(s)=="table" then + for i=1,#s do + s[i]=tags[s[i]] + end + end + l[tags[k]]=v + end + resources.lookups=l + end + local sequences=resources.sequences + if sequences then + for i=1,#sequences do + local s=sequences[i] + local n=s.name + if n then + s.name=tags[n] + end + local t=s.subtables + if type(t)=="table" then + for i=1,#t do + t[i]=tags[t[i]] + end + end + end + end + local lookuptypes=resources.lookuptypes + if lookuptypes then + local l={} + for k,v in next,lookuptypes do + l[tags[k]]=v + end + resources.lookuptypes=l + end + local anchor_to_lookup=resources.anchor_to_lookup + if anchor_to_lookup then + for anchor,lookups in next,anchor_to_lookup do + local l={} + for lookup,value in next,lookups do + l[tags[lookup]]=value + end + anchor_to_lookup[anchor]=l + end + end + local lookup_to_anchor=resources.lookup_to_anchor + if lookup_to_anchor then + local l={} + for lookup,value in next,lookup_to_anchor do + l[tags[lookup]]=value + end + resources.lookup_to_anchor=l + end + tags=table.swapped(tags) + report_otf("%s lookup tags compacted",#tags) + resources.lookuptags=tags +end function otf.setfeatures(tfmdata,features) local okay=constructors.initializefeatures("otf",tfmdata,features,trace_features,report_otf) if okay then @@ -8587,8 +8922,8 @@ local function copytotfm(data,cache_id) parameters.italicangle=italicangle parameters.charwidth=charwidth parameters.charxheight=charxheight - local space=0x0020 - local emdash=0x2014 + local space=0x0020 + local emdash=0x2014 if monospaced then if descriptions[space] then spaceunits,spacer=descriptions[space].width,"space" @@ -8635,7 +8970,7 @@ local function copytotfm(data,cache_id) if charxheight then parameters.x_height=charxheight else - local x=0x78 + local x=0x0078 if x then local x=descriptions[x] if x then @@ -8691,14 +9026,23 @@ local function otftotfm(specification) if duplicates then local nofduplicates,nofduplicated=0,0 for parent,list in next,duplicates do - for i=1,#list do - local unicode=list[i] - if not descriptions[unicode] then - descriptions[unicode]=descriptions[parent] + if type(list)=="table" then + local n=#list + for i=1,n do + local unicode=list[i] + if not descriptions[unicode] then + descriptions[unicode]=descriptions[parent] + nofduplicated=nofduplicated+1 + end + end + nofduplicates=nofduplicates+n + else + if not descriptions[list] then + descriptions[list]=descriptions[parent] nofduplicated=nofduplicated+1 end + nofduplicates=nofduplicates+1 end - nofduplicates=nofduplicates+#list end if trace_otf and nofduplicated~=nofduplicates then report_otf("%i extra duplicates copied out of %i",nofduplicated,nofduplicates) @@ -8829,7 +9173,7 @@ if not modules then modules={} end modules ['font-otb']={ } local concat=table.concat local format,gmatch,gsub,find,match,lower,strip=string.format,string.gmatch,string.gsub,string.find,string.match,string.lower,string.strip -local type,next,tonumber,tostring=type,next,tonumber,tostring +local type,next,tonumber,tostring,rawget=type,next,tonumber,tostring,rawget local lpegmatch=lpeg.match local utfchar=utf.char local trace_baseinit=false trackers.register("otf.baseinit",function(v) trace_baseinit=v end) @@ -8876,36 +9220,36 @@ local function gref(descriptions,n) return "" end end -local function cref(feature,lookupname) +local function cref(feature,lookuptags,lookupname) if lookupname then - return formatters["feature %a, lookup %a"](feature,lookupname) + return formatters["feature %a, lookup %a"](feature,lookuptags[lookupname]) else return formatters["feature %a"](feature) end end -local function report_alternate(feature,lookupname,descriptions,unicode,replacement,value,comment) +local function report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,comment) report_prepare("%s: base alternate %s => %s (%S => %S)", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,unicode), replacement and gref(descriptions,replacement), value, comment) end -local function report_substitution(feature,lookupname,descriptions,unicode,substitution) +local function report_substitution(feature,lookuptags,lookupname,descriptions,unicode,substitution) report_prepare("%s: base substitution %s => %S", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,unicode), gref(descriptions,substitution)) end -local function report_ligature(feature,lookupname,descriptions,unicode,ligature) +local function report_ligature(feature,lookuptags,lookupname,descriptions,unicode,ligature) report_prepare("%s: base ligature %s => %S", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,ligature), gref(descriptions,unicode)) end -local function report_kern(feature,lookupname,descriptions,unicode,otherunicode,value) +local function report_kern(feature,lookuptags,lookupname,descriptions,unicode,otherunicode,value) report_prepare("%s: base kern %s + %s => %S", - cref(feature,lookupname), + cref(feature,lookuptags,lookupname), gref(descriptions,unicode), gref(descriptions,otherunicode), value) @@ -8942,7 +9286,7 @@ local function finalize_ligatures(tfmdata,ligatures) local characters=tfmdata.characters local descriptions=tfmdata.descriptions local resources=tfmdata.resources - local unicodes=resources.unicodes + local unicodes=resources.unicodes local private=resources.private local alldone=false while not alldone do @@ -8978,12 +9322,12 @@ local function finalize_ligatures(tfmdata,ligatures) local secondname=firstname.."_"..secondcode if i==size-1 then target=unicode - if not unicodes[secondname] then + if not rawget(unicodes,secondname) then unicodes[secondname]=unicode end okay=true else - target=unicodes[secondname] + target=rawget(unicodes,secondname) if not target then break end @@ -9019,16 +9363,18 @@ local function finalize_ligatures(tfmdata,ligatures) end end resources.private=private + return true end end local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplist) local characters=tfmdata.characters local descriptions=tfmdata.descriptions local resources=tfmdata.resources + local properties=tfmdata.properties local changed=tfmdata.changed - local unicodes=resources.unicodes local lookuphash=resources.lookuphash local lookuptypes=resources.lookuptypes + local lookuptags=resources.lookuptags local ligatures={} local alternate=tonumber(value) or true and 1 local defaultalt=otf.defaultbasealternate @@ -9036,39 +9382,39 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local trace_alternatives=trace_baseinit and trace_alternatives local trace_ligatures=trace_baseinit and trace_ligatures local actions={ - substitution=function(lookupdata,lookupname,description,unicode) + substitution=function(lookupdata,lookuptags,lookupname,description,unicode) if trace_singles then - report_substitution(feature,lookupname,descriptions,unicode,lookupdata) + report_substitution(feature,lookuptags,lookupname,descriptions,unicode,lookupdata) end changed[unicode]=lookupdata end, - alternate=function(lookupdata,lookupname,description,unicode) + alternate=function(lookupdata,lookuptags,lookupname,description,unicode) local replacement=lookupdata[alternate] if replacement then changed[unicode]=replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"normal") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"normal") end elseif defaultalt=="first" then replacement=lookupdata[1] changed[unicode]=replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end elseif defaultalt=="last" then replacement=lookupdata[#data] if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end else if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"unknown") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"unknown") end end end, - ligature=function(lookupdata,lookupname,description,unicode) + ligature=function(lookupdata,lookuptags,lookupname,description,unicode) if trace_ligatures then - report_ligature(feature,lookupname,descriptions,unicode,lookupdata) + report_ligature(feature,lookuptags,lookupname,descriptions,unicode,lookupdata) end ligatures[#ligatures+1]={ unicode,lookupdata } end, @@ -9084,7 +9430,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local lookuptype=lookuptypes[lookupname] local action=actions[lookuptype] if action then - action(lookupdata,lookupname,description,unicode) + action(lookupdata,lookuptags,lookupname,description,unicode) end end end @@ -9099,22 +9445,24 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local action=actions[lookuptype] if action then for i=1,#lookuplist do - action(lookuplist[i],lookupname,description,unicode) + action(lookuplist[i],lookuptags,lookupname,description,unicode) end end end end end end - finalize_ligatures(tfmdata,ligatures) + properties.hasligatures=finalize_ligatures(tfmdata,ligatures) end local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist) local characters=tfmdata.characters local descriptions=tfmdata.descriptions local resources=tfmdata.resources - local unicodes=resources.unicodes + local properties=tfmdata.properties + local lookuptags=resources.lookuptags local sharedkerns={} local traceindeed=trace_baseinit and trace_kerns + local haskerns=false for unicode,character in next,characters do local description=descriptions[unicode] local rawkerns=description.kerns @@ -9136,13 +9484,13 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist newkerns={ [otherunicode]=value } done=true if traceindeed then - report_kern(feature,lookup,descriptions,unicode,otherunicode,value) + report_kern(feature,lookuptags,lookup,descriptions,unicode,otherunicode,value) end elseif not newkerns[otherunicode] then newkerns[otherunicode]=value done=true if traceindeed then - report_kern(feature,lookup,descriptions,unicode,otherunicode,value) + report_kern(feature,lookuptags,lookup,descriptions,unicode,otherunicode,value) end end end @@ -9151,12 +9499,14 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist if done then sharedkerns[rawkerns]=newkerns character.kerns=newkerns + haskerns=true else sharedkerns[rawkerns]=false end end end end + properties.haskerns=haskerns end basemethods.independent={ preparesubstitutions=preparesubstitutions, @@ -9182,13 +9532,13 @@ local function make_1(present,tree,name) end end end -local function make_2(present,tfmdata,characters,tree,name,preceding,unicode,done,lookupname) +local function make_2(present,tfmdata,characters,tree,name,preceding,unicode,done,lookuptags,lookupname) for k,v in next,tree do if k=="ligature" then local character=characters[preceding] if not character then if trace_baseinit then - report_prepare("weird ligature in lookup %a, current %C, preceding %C",lookupname,v,preceding) + report_prepare("weird ligature in lookup %a, current %C, preceding %C",lookuptags[lookupname],v,preceding) end character=makefake(tfmdata,name,present) end @@ -9209,7 +9559,7 @@ local function make_2(present,tfmdata,characters,tree,name,preceding,unicode,don else local code=present[name] or unicode local name=name.."_"..k - make_2(present,tfmdata,characters,v,name,code,k,done,lookupname) + make_2(present,tfmdata,characters,v,name,code,k,done,lookuptags,lookupname) end end end @@ -9220,6 +9570,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis local changed=tfmdata.changed local lookuphash=resources.lookuphash local lookuptypes=resources.lookuptypes + local lookuptags=resources.lookuptags local ligatures={} local alternate=tonumber(value) or true and 1 local defaultalt=otf.defaultbasealternate @@ -9233,7 +9584,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis for unicode,data in next,lookupdata do if lookuptype=="substitution" then if trace_singles then - report_substitution(feature,lookupname,descriptions,unicode,data) + report_substitution(feature,lookuptags,lookupname,descriptions,unicode,data) end changed[unicode]=data elseif lookuptype=="alternate" then @@ -9241,28 +9592,28 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis if replacement then changed[unicode]=replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"normal") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"normal") end elseif defaultalt=="first" then replacement=data[1] changed[unicode]=replacement if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end elseif defaultalt=="last" then replacement=data[#data] if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,defaultalt) + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,defaultalt) end else if trace_alternatives then - report_alternate(feature,lookupname,descriptions,unicode,replacement,value,"unknown") + report_alternate(feature,lookuptags,lookupname,descriptions,unicode,replacement,value,"unknown") end end elseif lookuptype=="ligature" then ligatures[#ligatures+1]={ unicode,data,lookupname } if trace_ligatures then - report_ligature(feature,lookupname,descriptions,unicode,data) + report_ligature(feature,lookuptags,lookupname,descriptions,unicode,data) end end end @@ -9280,7 +9631,7 @@ local function preparesubstitutions(tfmdata,feature,value,validlookups,lookuplis for i=1,nofligatures do local ligature=ligatures[i] local unicode,tree,lookupname=ligature[1],ligature[2],ligature[3] - make_2(present,tfmdata,characters,tree,"ctx_"..unicode,unicode,unicode,done,lookupname) + make_2(present,tfmdata,characters,tree,"ctx_"..unicode,unicode,unicode,done,lookuptags,lookupname) end end end @@ -9288,7 +9639,9 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist local characters=tfmdata.characters local descriptions=tfmdata.descriptions local resources=tfmdata.resources + local properties=tfmdata.properties local lookuphash=resources.lookuphash + local lookuptags=resources.lookuptags local traceindeed=trace_baseinit and trace_kerns for l=1,#lookuplist do local lookupname=lookuplist[l] @@ -9304,7 +9657,7 @@ local function preparepositionings(tfmdata,feature,value,validlookups,lookuplist for otherunicode,kern in next,data do if not kerns[otherunicode] and kern~=0 then kerns[otherunicode]=kern - report_kern(feature,lookup,descriptions,unicode,otherunicode,kern) + report_kern(feature,lookuptags,lookup,descriptions,unicode,otherunicode,kern) end end else @@ -10318,6 +10671,7 @@ local currentfont=false local lookuptable=false local anchorlookups=false local lookuptypes=false +local lookuptags=false local handlers={} local rlmode=0 local featurevalue=false @@ -10362,19 +10716,19 @@ local function gref(n) end local function cref(kind,chainname,chainlookupname,lookupname,index) if index then - return formatters["feature %a, chain %a, sub %a, lookup %a, index %a"](kind,chainname,chainlookupname,lookupname,index) + return formatters["feature %a, chain %a, sub %a, lookup %a, index %a"](kind,chainname,chainlookupname,lookuptags[lookupname],index) elseif lookupname then - return formatters["feature %a, chain %a, sub %a, lookup %a"](kind,chainname,chainlookupname,lookupname) + return formatters["feature %a, chain %a, sub %a, lookup %a"](kind,chainname,chainlookupname,lookuptags[lookupname]) elseif chainlookupname then - return formatters["feature %a, chain %a, sub %a"](kind,chainname,chainlookupname) + return formatters["feature %a, chain %a, sub %a"](kind,lookuptags[chainname],lookuptags[chainlookupname]) elseif chainname then - return formatters["feature %a, chain %a"](kind,chainname) + return formatters["feature %a, chain %a"](kind,lookuptags[chainname]) else return formatters["feature %a"](kind) end end local function pref(kind,lookupname) - return formatters["feature %a, lookup %a"](kind,lookupname) + return formatters["feature %a, lookup %a"](kind,lookuptags[lookupname]) end local function copy_glyph(g) local components=g.components @@ -11728,7 +12082,7 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq end else local i=1 - repeat + while true do if skipped then while true do local char=start.char @@ -11765,11 +12119,13 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq end end end - if start then + if i>nofchainlookups then + break + elseif start then start=start.next else end - until i>nofchainlookups + end end else local replacements=ck[7] @@ -11910,6 +12266,7 @@ local function featuresprocessor(head,font,attr) anchorlookups=resources.lookup_to_anchor lookuptable=resources.lookups lookuptypes=resources.lookuptypes + lookuptags=resources.lookuptags currentfont=font rlmode=0 local sequences=resources.sequences @@ -12441,6 +12798,7 @@ local function prepare_contextchains(tfmdata) local rawdata=tfmdata.shared.rawdata local resources=rawdata.resources local lookuphash=resources.lookuphash + local lookuptags=resources.lookuptags local lookups=rawdata.lookups if lookups then for lookupname,lookupdata in next,rawdata.lookups do @@ -12453,7 +12811,7 @@ local function prepare_contextchains(tfmdata) if not validformat then report_prepare("unsupported format %a",format) elseif not validformat[lookuptype] then - report_prepare("unsupported format %a, lookuptype %a, lookupname %a",format,lookuptype,lookupname) + report_prepare("unsupported format %a, lookuptype %a, lookupname %a",format,lookuptype,lookuptags[lookupname]) else local contexts=lookuphash[lookupname] if not contexts then @@ -12502,7 +12860,7 @@ local function prepare_contextchains(tfmdata) else end else - report_prepare("missing lookuptype for lookupname %a",lookupname) + report_prepare("missing lookuptype for lookupname %a",lookuptags[lookupname]) end end end @@ -13374,6 +13732,7 @@ if otf.enhancers.register then otf.enhancers.register("unpack",unpackdata) end otf.enhancers.unpack=unpackdata +otf.enhancers.pack=packdata end -- closure diff --git a/tex/generic/context/luatex/luatex-fonts-otn.lua b/tex/generic/context/luatex/luatex-fonts-otn.lua index 068f0a9b9..831b23350 100644 --- a/tex/generic/context/luatex/luatex-fonts-otn.lua +++ b/tex/generic/context/luatex/luatex-fonts-otn.lua @@ -252,6 +252,7 @@ local currentfont = false local lookuptable = false local anchorlookups = false local lookuptypes = false +local lookuptags = false local handlers = { } local rlmode = 0 local featurevalue = false @@ -306,20 +307,20 @@ end local function cref(kind,chainname,chainlookupname,lookupname,index) -- not in the mood to alias f_ if index then - return formatters["feature %a, chain %a, sub %a, lookup %a, index %a"](kind,chainname,chainlookupname,lookupname,index) + return formatters["feature %a, chain %a, sub %a, lookup %a, index %a"](kind,chainname,chainlookupname,lookuptags[lookupname],index) elseif lookupname then - return formatters["feature %a, chain %a, sub %a, lookup %a"](kind,chainname,chainlookupname,lookupname) + return formatters["feature %a, chain %a, sub %a, lookup %a"](kind,chainname,chainlookupname,lookuptags[lookupname]) elseif chainlookupname then - return formatters["feature %a, chain %a, sub %a"](kind,chainname,chainlookupname) + return formatters["feature %a, chain %a, sub %a"](kind,lookuptags[chainname],lookuptags[chainlookupname]) elseif chainname then - return formatters["feature %a, chain %a"](kind,chainname) + return formatters["feature %a, chain %a"](kind,lookuptags[chainname]) else return formatters["feature %a"](kind) end end local function pref(kind,lookupname) - return formatters["feature %a, lookup %a"](kind,lookupname) + return formatters["feature %a, lookup %a"](kind,lookuptags[lookupname]) end -- We can assume that languages that use marks are not hyphenated. We can also assume @@ -1896,7 +1897,7 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq end else local i = 1 - repeat + while true do if skipped then while true do local char = start.char @@ -1937,12 +1938,14 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq end end end - if start then + if i > nofchainlookups then + break + elseif start then start = start.next else -- weird end - until i > nofchainlookups + end end else local replacements = ck[7] @@ -2139,6 +2142,7 @@ local function featuresprocessor(head,font,attr) anchorlookups = resources.lookup_to_anchor lookuptable = resources.lookups lookuptypes = resources.lookuptypes + lookuptags = resources.lookuptags currentfont = font rlmode = 0 @@ -2734,6 +2738,7 @@ local function prepare_contextchains(tfmdata) local rawdata = tfmdata.shared.rawdata local resources = rawdata.resources local lookuphash = resources.lookuphash + local lookuptags = resources.lookuptags local lookups = rawdata.lookups if lookups then for lookupname, lookupdata in next, rawdata.lookups do @@ -2747,7 +2752,7 @@ local function prepare_contextchains(tfmdata) report_prepare("unsupported format %a",format) elseif not validformat[lookuptype] then -- todo: dejavu-serif has one (but i need to see what use it has) - report_prepare("unsupported format %a, lookuptype %a, lookupname %a",format,lookuptype,lookupname) + report_prepare("unsupported format %a, lookuptype %a, lookupname %a",format,lookuptype,lookuptags[lookupname]) else local contexts = lookuphash[lookupname] if not contexts then @@ -2803,7 +2808,7 @@ local function prepare_contextchains(tfmdata) -- no rules end else - report_prepare("missing lookuptype for lookupname %a",lookupname) + report_prepare("missing lookuptype for lookupname %a",lookuptags[lookupname]) end end end -- cgit v1.2.3