summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/lang-rep.lmt
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkxl/lang-rep.lmt')
-rw-r--r--tex/context/base/mkxl/lang-rep.lmt467
1 files changed, 467 insertions, 0 deletions
diff --git a/tex/context/base/mkxl/lang-rep.lmt b/tex/context/base/mkxl/lang-rep.lmt
new file mode 100644
index 000000000..6139a03f7
--- /dev/null
+++ b/tex/context/base/mkxl/lang-rep.lmt
@@ -0,0 +1,467 @@
+if not modules then modules = { } end modules ['lang-rep'] = {
+ version = 1.001,
+ comment = "companion to lang-rep.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- A BachoTeX 2013 experiment, probably not that useful. Eventually I used a simpler
+-- more generic example. I'm sure no one ever notices of even needs this code.
+--
+-- As a follow up on a question by Alan about special treatment of dropped caps I wonder
+-- if I can make this one more clever (probably in a few more dev steps). For instance
+-- injecting nodes or replacing nodes. It's a prelude to a kind of lpeg for nodes,
+-- although (given experiences so far) we don't really need that. After all, each problem
+-- is somewhat unique.
+
+local type, tonumber, next = type, tonumber, next
+local gmatch, gsub = string.gmatch, string.gsub
+local utfbyte, utfsplit = utf.byte, utf.split
+local P, C, U, Cc, Ct, Cs, lpegmatch = lpeg.P, lpeg.C, lpeg.patterns.utf8character, lpeg.Cc, lpeg.Ct, lpeg.Cs, lpeg.match
+local find = string.find
+
+local zwnj = 0x200C
+local grouped = P("{") * ( Ct((U/utfbyte-P("}"))^1) + Cc(false) ) * P("}")-- grouped
+local splitter = Ct((
+ #P("{") * (
+ P("{}") / function() return zwnj end
+ + Ct(Cc("discretionary") * grouped * grouped * grouped)
+ + Ct(Cc("noligature") * grouped)
+ )
+ + U/utfbyte
+ )^1)
+
+local stripper = P("{") * Cs((1-P(-2))^0) * P("}") * P(-1)
+
+local trace_replacements = false trackers.register("languages.replacements", function(v) trace_replacements = v end)
+local trace_details = false trackers.register("languages.replacements.details", function(v) trace_details = v end)
+
+local report_replacement = logs.reporter("languages","replacements")
+
+local glyph_code = nodes.nodecodes.glyph
+local glue_code = nodes.nodecodes.glue
+
+local spaceskip_code = nodes.gluecodes.spaceskip
+local xspaceskip_code = nodes.gluecodes.xspaceskip
+
+local nuts = nodes.nuts
+
+local getnext = nuts.getnext
+local getprev = nuts.getprev
+local getattr = nuts.getattr
+local getid = nuts.getid
+local getsubtype = nuts.getsubtype
+local getchar = nuts.getchar
+local isglyph = nuts.isglyph
+
+local setlink = nuts.setlink
+local setnext = nuts.setnext
+local setprev = nuts.setprev
+local setchar = nuts.setchar
+local setattrlist = nuts.setattrlist
+local setoptions = nuts.setoptions
+
+local glyphoptioncodes = tex.glyphoptioncodes
+local norightligature_option = glyphoptioncodes.norightligature
+local noleftligature_option = glyphoptioncodes.noleftligature
+
+local insertbefore = nuts.insertbefore
+local insertafter = nuts.insertafter
+local remove_node = nuts.remove
+local copy_node = nuts.copy
+local flushlist = nuts.flushlist
+
+local nodepool = nuts.pool
+local new_disc = nodepool.disc
+
+local texsetattribute = tex.setattribute
+local unsetvalue = attributes.unsetvalue
+
+local enableaction = nodes.tasks.enableaction
+
+local v_reset = interfaces.variables.reset
+
+local implement = interfaces.implement
+
+local processors = typesetters.processors
+local splitprocessor = processors.split
+
+local replacements = languages.replacements or { }
+languages.replacements = replacements
+
+local a_replacements = attributes.private("replacements")
+
+local lists = { }
+local last = 0
+local trees = { }
+
+table.setmetatableindex(lists,function(lists,name)
+ last = last + 1
+ local list = { }
+ local data = { name = name, list = list, attribute = last }
+ lists[last] = data
+ lists[name] = data
+ trees[last] = list
+ return data
+end)
+
+lists[v_reset].attribute = unsetvalue -- so we discard 0
+
+-- todo: glue kern attr
+
+local function add(root,word,replacement)
+ local processor, replacement = splitprocessor(replacement,true) -- no check
+ replacement = lpegmatch(stripper,replacement) or replacement
+ local list = utfsplit(word) -- ,true)
+ local size = #list
+ for i=1,size do
+ local l = utfbyte(list[i])
+ if not root[l] then
+ root[l] = { }
+ end
+ if i == size then
+ local special = find(replacement,"{",1,true)
+ local newlist = lpegmatch(splitter,replacement)
+ root[l].final = {
+ word = word,
+ replacement = replacement,
+ processor = processor,
+ oldlength = size,
+ newcodes = newlist,
+ special = special,
+ }
+ end
+ root = root[l]
+ end
+end
+
+function replacements.add(category,word,replacement)
+ local root = lists[category].list
+ if type(word) == "table" then
+ for word, replacement in next, word do
+ add(root,word,replacement)
+ end
+ else
+ add(root,word,replacement or "")
+ end
+end
+
+-- local strip = lpeg.stripper("{}")
+
+function languages.replacements.addlist(category,list)
+ local root = lists[category].list
+ if type(list) == "string" then
+ for new in gmatch(list,"%S+") do
+ local old = gsub(new,"[{}]","")
+ -- local old = lpegmatch(strip,new)
+ add(root,old,new)
+ end
+ else
+ for i=1,#list do
+ local new = list[i]
+ local old = gsub(new,"[{}]","")
+ -- local old = lpegmatch(strip,new)
+ add(root,old,new)
+ end
+ end
+end
+
+local function tonodes(list,template)
+ local head, current
+ for i=1,#list do
+ local new = copy_node(template)
+ setchar(new,list[i])
+ if head then
+ head, current = insertafter(head,current,new)
+ else
+ head, current = new, new
+ end
+ end
+ return head
+end
+
+local is_punctuation = characters.is_punctuation
+
+-- We can try to be clever and use the fact that there is no match to skip
+-- over to the next word but it is gives fuzzy code so for now I removed
+-- that optimization (when I really need a high performance version myself
+-- I will look into it (but so far I never used this mechanism myself).
+--
+-- We used to have the hit checker as function but is got messy when checks
+-- for punctuation was added.
+
+local function replace(head,first,last,final,hasspace,overload)
+ local current = first
+ local prefirst = getprev(first) or head
+ local postlast = getnext(last)
+ local oldlength = final.oldlength
+ local newcodes = final.newcodes
+ local newlength = newcodes and #newcodes or 0
+ if trace_replacements then
+ report_replacement("replacing word %a by %a",final.word,final.replacement)
+ end
+ if hasspace or final.special then
+ -- It's easier to delete and insert so we do just that. On the todo list is
+ -- turn injected spaces into glue but easier might be to let the char break
+ -- handler do that ...
+ local prev = getprev(current)
+ local next = getnext(last)
+ local list = current
+ setnext(last)
+ setlink(prev,next)
+ current = prev
+ if not current then
+ head = nil
+ end
+ local i = 1
+ while i <= newlength do
+ local codes = newcodes[i]
+ if type(codes) == "table" then
+ local method = codes[1]
+ if method == "discretionary" then
+ local pre, post, replace = codes[2], codes[3], codes[4]
+ if pre then
+ pre = tonodes(pre,first)
+ end
+ if post then
+ post = tonodes(post,first)
+ end
+ if replace then
+ replace = tonodes(replace,first)
+ end
+ -- todo: also set attr
+ local new = new_disc(pre,post,replace)
+ setattrlist(new,first)
+ head, current = insertafter(head,current,new)
+ elseif method == "noligature" then
+ -- not that efficient to copy but ok for testing
+ local list = codes[2]
+ if list then
+ local n = #list
+ for i=1,n do
+ local new = copy_node(first)
+ setchar(new,list[i])
+ if i == 1 then
+ setoptions(new,norightligature_option)
+ elseif i == n then
+ setoptions(new,glyphoptioncodes.noleftligature | norightligature_option)
+ else
+ setoptions(new,glyphoptioncodes.noleftligature)
+ end
+ head, current = insertafter(head,current,new)
+ end
+ else
+ -- local new = copy_node(first)
+ -- setchar(new,zwnj)
+ -- head, current = insertafter(head,current,new)
+ setoptions(current,norightligature_option)
+ end
+ else
+ report_replacement("unknown method %a",method or "?")
+ end
+ else
+ local new = copy_node(first)
+ setchar(new,codes)
+ head, current = insertafter(head,current,new)
+ end
+ i = i + 1
+ end
+ flushlist(list)
+ elseif newlength == 0 then
+ -- we overload
+ elseif oldlength == newlength then
+ if final.word ~= final.replacement then
+ for i=1,newlength do
+ setchar(current,newcodes[i])
+ current = getnext(current)
+ end
+ end
+ current = getnext(final)
+ elseif oldlength < newlength then
+ for i=1,newlength-oldlength do
+ local n = copy_node(current)
+ setchar(n,newcodes[i])
+ head, current = insertbefore(head,current,n)
+ current = getnext(current)
+ end
+ for i=newlength-oldlength+1,newlength do
+ setchar(current,newcodes[i])
+ current = getnext(current)
+ end
+ else
+ for i=1,oldlength-newlength do
+ head, current = remove_node(head,current,true)
+ end
+ for i=1,newlength do
+ setchar(current,newcodes[i])
+ current = getnext(current)
+ end
+ end
+ if overload then
+ overload(final,getnext(prefirst),getprev(postlast))
+ end
+ return head, postlast
+end
+
+-- we handle just one space
+
+function replacements.handler(head)
+ local current = head
+ local overload = attributes.applyoverloads
+ local mode = false -- we're in word or punctuation mode
+ local wordstart = false
+ local wordend = false
+ local prevend = false
+ local prevfinal = false
+ local tree = false
+ local root = false
+ local hasspace = false
+ while current do
+ local id = getid(current) -- or use the char getter
+ if id == glyph_code then
+ local a = getattr(current,a_replacements)
+ if a then
+ -- we have a run
+ tree = trees[a]
+ if tree then
+ local char = getchar(current)
+ local punc = is_punctuation[char]
+ if mode == "punc" then
+ if not punc then
+ if root then
+ local final = root.final
+ if final then
+ head = replace(head,wordstart,wordend,final,hasspace,overload)
+ elseif prevfinal then
+ head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
+ end
+ prevfinal = false
+ root = false
+ end
+ mode = "word"
+ end
+ elseif mode == "word" then
+ if punc then
+ if root then
+ local final = root.final
+ if final then
+ head = replace(head,wordstart,wordend,final,hasspace,overload)
+ elseif prevfinal then
+ head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
+ end
+ prevfinal = false
+ root = false
+ end
+ mode = "punc"
+ end
+ else
+ mode = punc and "punc" or "word"
+ end
+ if root then
+ root = root[char]
+ if root then
+ wordend = current
+ end
+ else
+ if prevfinal then
+ head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
+ prevfinal = false
+ end
+ root = tree[char]
+ if root then
+ wordstart = current
+ wordend = current
+ prevend = false
+ hasspace = false
+ end
+ end
+ else
+ root= false
+ end
+ else
+ tree = false
+ end
+ current = getnext(current)
+ elseif root then
+ local final = root.final
+ if mode == "word" and id == glue_code then
+ local s = getsubtype(current)
+ if s == spaceskip_code or s == xspaceskip_code then
+ local r = root[32] -- maybe more types
+ if r then
+ if not prevend then
+ local f = root.final
+ if f then
+ prevend = wordend
+ prevfinal = f
+ end
+ end
+ wordend = current
+ root = r
+ hasspace = true
+ goto moveon
+ end
+ end
+ end
+ if final then
+ head, current = replace(head,wordstart,wordend,final,hasspace,overload)
+ elseif prevfinal then
+ head, current = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
+ end
+ prevfinal = false
+ root = false
+ ::moveon::
+ current = getnext(current)
+ else
+ current = getnext(current)
+ end
+ end
+ if root then
+ local final = root.final
+ if final then
+ head = replace(head,wordstart,wordend,final,hasspace,overload)
+ elseif prevfinal then
+ head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
+ end
+ end
+ return head
+end
+
+local enabled = false
+
+function replacements.set(n)
+ if n == v_reset then
+ n = unsetvalue
+ else
+ n = lists[n].attribute
+ if not enabled then
+ enableaction("processors","languages.replacements.handler")
+ if trace_replacements then
+ report_replacement("enabling replacement handler")
+ end
+ enabled = true
+ end
+ end
+ texsetattribute(a_replacements,n)
+end
+
+-- interface
+
+implement {
+ name = "setreplacements",
+ actions = replacements.set,
+ arguments = "string"
+}
+
+implement {
+ name = "addreplacements",
+ actions = replacements.add,
+ arguments = "3 strings",
+}
+
+implement {
+ name = "addreplacementslist",
+ actions = replacements.addlist,
+ arguments = "2 strings",
+}