if not modules then modules = { } end modules ['spac-chr'] = { version = 1.001, comment = "companion to spac-chr.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } local byte, lower = string.byte, string.lower -- beware: attribute copying is bugged ... there will be a proper luatex helper -- for this -- to be redone: characters will become tagged spaces instead as then we keep track of -- spaceskip etc -- todo: only setattr when export / use properties local next = next local trace_characters = false trackers.register("typesetters.characters", function(v) trace_characters = v end) local trace_nbsp = false trackers.register("typesetters.nbsp", function(v) trace_nbsp = v end) local report_characters = logs.reporter("typesetting","characters") local nodes, node = nodes, node local nuts = nodes.nuts local tonode = nuts.tonode local tonut = nuts.tonut local getnext = nuts.getnext local getprev = nuts.getprev local getattr = nuts.getattr local setattr = nuts.setattr local getlang = nuts.getlang local setchar = nuts.setchar local setattrlist = nuts.setattrlist local getfont = nuts.getfont local getchar = nuts.getchar local setsubtype = nuts.setsubtype local setdisc = nuts.setdisc local isglyph = nuts.isglyph local setcolor = nodes.tracers.colors.set local insert_node_before = nuts.insert_before local insert_node_after = nuts.insert_after local remove_node = nuts.remove local traverse_id = nuts.traverse_id local traverse_char = nuts.traverse_char local copy_node = nuts.copy local tasks = nodes.tasks local nodepool = nuts.pool local new_penalty = nodepool.penalty local new_glue = nodepool.glue local new_kern = nodepool.kern local new_rule = nodepool.rule local new_disc = nodepool.disc local nodecodes = nodes.nodecodes local skipcodes = nodes.skipcodes local disccodes = nodes.disccodes local glyph_code = nodecodes.glyph local space_skip_code = skipcodes.spaceskip local explicit_code = disccodes.explicit local chardata = characters.data local is_punctuation = characters.is_punctuation local typesetters = typesetters local unicodeblocks = characters.blocks local characters = typesetters.characters or { } -- can be predefined typesetters.characters = characters local fonthashes = fonts.hashes local fontparameters = fonthashes.parameters local fontcharacters = fonthashes.characters local fontquads = fonthashes.quads local setmetatableindex = table.setmetatableindex local a_character = attributes.private("characters") local a_alignstate = attributes.private("alignstate") local c_zero = byte('0') local c_period = byte('.') local function inject_quad_space(unicode,head,current,fraction) if fraction ~= 0 then fraction = fraction * fontquads[getfont(current)] end local glue = new_glue(fraction) setattrlist(glue,current) setattrlist(current) -- why reset all setattr(glue,a_character,unicode) head, current = insert_node_after(head,current,glue) return head, current end local function inject_char_space(unicode,head,current,parent) local font = getfont(current) local char = fontcharacters[font][parent] local glue = new_glue(char and char.width or fontparameters[font].space) setattrlist(glue,current) setattrlist(current) -- why reset all setattr(glue,a_character,unicode) head, current = insert_node_after(head,current,glue) return head, current end local function inject_nobreak_space(unicode,head,current,space,spacestretch,spaceshrink) local glue = new_glue(space,spacestretch,spaceshrink) local penalty = new_penalty(10000) setattrlist(glue,current) setattrlist(current) -- why reset all setattr(glue,a_character,unicode) -- bombs head, current = insert_node_after(head,current,penalty) if trace_nbsp then local rule = new_rule(space) local kern = new_kern(-space) local penalty = new_penalty(10000) setcolor(rule,"orange") head, current = insert_node_after(head,current,rule) head, current = insert_node_after(head,current,kern) head, current = insert_node_after(head,current,penalty) end head, current = insert_node_after(head,current,glue) return head, current end local function nbsp(head,current) local para = fontparameters[getfont(current)] if getattr(current,a_alignstate) == 1 then -- flushright head, current = inject_nobreak_space(0x00A0,head,current,para.space,0,0) setsubtype(current,space_skip_code) else head, current = inject_nobreak_space(0x00A0,head,current,para.space,para.spacestretch,para.spaceshrink) setsubtype(current,space_skip_code) end return head, current end -- assumes nuts or nodes, depending on callers .. so no tonuts here function characters.replacenbsp(head,original) local head, current = nbsp(head,original) head = remove_node(head,original,true) return head, current end -- function characters.replacenbspaces(head) -- for current in traverse_id(glyph_code,head) do -- if getchar(current) == 0x00A0 then -- local h = nbsp(head,current) -- if h then -- head = remove_node(h,current,true) -- end -- end -- end -- return head -- end function characters.replacenbspaces(head) local head = tonut(head) local wipe = false for current in traverse_id(glyph_code,head) do -- can be anytiem so no traverse_char if getchar(current) == 0x00A0 then if wipe then head = remove_node(h,current,true) wipe = false end local h = nbsp(head,current) if h then wipe = current end end end if wipe then head = remove_node(h,current,true) end return tonode(head) end -- This initialization might move someplace else if we need more of it. The problem is that -- this module depends on fonts so we have an order problem. local nbsphash = { } setmetatableindex(nbsphash,function(t,k) for i=unicodeblocks.devanagari.first,unicodeblocks.devanagari.last do nbsphash[i] = true end for i=unicodeblocks.kannada .first,unicodeblocks.kannada .last do nbsphash[i] = true end setmetatableindex(nbsphash,nil) return nbsphash[k] end) local methods = { -- The next one uses an attribute assigned to the character but still we -- don't have the 'local' value. -- maybe also 0x0008 : backspace [0x001F] = function(head,current) -- kind of special local next = getnext(current) if next then local char = isglyph(next) if char then head, current = remove_node(head,current,true) if not is_punctuation[char] then local p = fontparameters[getfont(next)] head, current = insert_node_before(head,current,new_glue(p.space,p.space_stretch,p.space_shrink)) end end end end, [0x00A0] = function(head,current) -- nbsp local next = getnext(current) if next then local char = isglyph(current) if not char then -- move on elseif char == 0x200C or char == 0x200D then -- nzwj zwj next = getnext(next) if next then char = isglyph(next) if char and nbsphash[char] then return false end end elseif nbsphash[char] then return false end end local prev = getprev(current) if prev then local char = isglyph(prev) if char and nbsphash[char] then return false end end return nbsp(head,current) end, [0x00AD] = function(head,current) -- softhyphen return insert_node_after(head,current,languages.explicithyphen(current)) end, [0x2000] = function(head,current) -- enquad return inject_quad_space(0x2000,head,current,1/2) end, [0x2001] = function(head,current) -- emquad return inject_quad_space(0x2001,head,current,1) end, [0x2002] = function(head,current) -- enspace return inject_quad_space(0x2002,head,current,1/2) end, [0x2003] = function(head,current) -- emspace return inject_quad_space(0x2003,head,current,1) end, [0x2004] = function(head,current) -- threeperemspace return inject_quad_space(0x2004,head,current,1/3) end, [0x2005] = function(head,current) -- fourperemspace return inject_quad_space(0x2005,head,current,1/4) end, [0x2006] = function(head,current) -- sixperemspace return inject_quad_space(0x2006,head,current,1/6) end, [0x2007] = function(head,current) -- figurespace return inject_char_space(0x2007,head,current,c_zero) end, [0x2008] = function(head,current) -- punctuationspace return inject_char_space(0x2008,head,current,c_period) end, [0x2009] = function(head,current) -- breakablethinspace return inject_quad_space(0x2009,head,current,1/8) -- same as next end, [0x200A] = function(head,current) -- hairspace return inject_quad_space(0x200A,head,current,1/8) -- same as previous (todo) end, [0x200B] = function(head,current) -- zerowidthspace return inject_quad_space(0x200B,head,current,0) end, [0x202F] = function(head,current) -- narrownobreakspace return inject_nobreak_space(0x202F,head,current,fontquads[getfont(current)]/8) end, [0x205F] = function(head,current) -- math thinspace return inject_nobreak_space(0x205F,head,current,4*fontquads[getfont(current)]/18) end, -- The next one is also a bom so maybe only when we have glyphs around it -- [0xFEFF] = function(head,current) -- zerowidthnobreakspace -- return head, current -- end, } characters.methods = methods -- function characters.handler(head) -- todo: use traverse_id -- head = tonut(head) -- local current = head -- local done = false -- while current do -- local char, id = isglyph(current) -- if char then -- local next = getnext(current) -- local method = methods[char] -- if method then -- if trace_characters then -- report_characters("replacing character %C, description %a",char,lower(chardata[char].description)) -- end -- local h = method(head,current) -- if h then -- head = remove_node(h,current,true) -- end -- done = true -- end -- current = next -- else -- current = getnext(current) -- end -- end -- return tonode(head), done -- end -- for current, char, font in traverse_char_data(head) will save 0.015 on a 300 page doc -- this also works ok in math as we run over glyphs and these stay glyphs ... not sure -- about scripts and such but that is not important anyway ... some day we can consider -- special definitions in math function characters.handler(head) local head = tonut(head) local wipe = false for current in traverse_char(head) do local char = getchar(current) if char then local method = methods[char] if method then if wipe then head = remove_node(head,wipe,true) wipe = false end if trace_characters then report_characters("replacing character %C, description %a",char,lower(chardata[char].description)) end local h = method(head,current) if h then wipe = current end done = true end end end if wipe then head = remove_node(head,wipe,true) end return tonode(head), done end