From 85b7bc695629926641c7cb752fd478adfdf374f3 Mon Sep 17 00:00:00 2001 From: Marius Date: Sun, 4 Jul 2010 15:32:09 +0300 Subject: stable 2010-05-24 13:10 --- tex/context/base/scrp-ini.lua | 388 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 tex/context/base/scrp-ini.lua (limited to 'tex/context/base/scrp-ini.lua') diff --git a/tex/context/base/scrp-ini.lua b/tex/context/base/scrp-ini.lua new file mode 100644 index 000000000..b28c297d0 --- /dev/null +++ b/tex/context/base/scrp-ini.lua @@ -0,0 +1,388 @@ +if not modules then modules = { } end modules ['scrp-ini'] = { + version = 1.001, + comment = "companion to scrp-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local trace_analyzing = false trackers.register("scripts.analyzing", function(v) trace_analyzing = v end) +local trace_injections = false trackers.register("scripts.injections", function(v) trace_injections = v end) + +local set_attribute = node.set_attribute +local has_attribute = node.has_attribute +local first_character = node.first_character +local traverse_id = node.traverse_id + +local glyph = node.id('glyph') +local glue = node.id('glue') +local penalty = node.id('penalty') + +local fcs = (fonts.color and fonts.color.set) or function() end +local fcr = (fonts.color and fonts.color.reset) or function() end + +local state = attributes.private('state') +local preproc = attributes.private('preproc') +local prestat = attributes.private('prestat') + +local fontdata = fonts.ids + +scripts = scripts or { } +scripts.handlers = scripts.handlers or { } + +scripts.names = scripts.names or { } +scripts.numbers = scripts.numbers or { } +scripts.hash = scripts.hash or { } + +storage.register("scripts/hash", scripts.hash, "scripts.hash") + +if not next(scripts.hash) then + + local hash = { + -- + -- half width opening parenthesis + [0x0028] = "half_width_open", + [0x005B] = "half_width_open", + [0x007B] = "half_width_open", + [0x2018] = "half_width_open", -- ‘ + [0x201C] = "half_width_open", -- “ + -- + -- full width opening parenthesis + -- + [0x3008] = "full_width_open", -- 〈 Left book quote + [0x300A] = "full_width_open", -- 《 Left double book quote + [0x300C] = "full_width_open", -- 「 left quote + [0x300E] = "full_width_open", -- 『 left double quote + [0x3010] = "full_width_open", -- 【 left double book quote + [0x3014] = "full_width_open", -- 〔 left book quote + [0x3016] = "full_width_open", --〖 left double book quote + [0x3018] = "full_width_open", -- left tortoise bracket + [0x301A] = "full_width_open", -- left square bracket + [0x301D] = "full_width_open", -- reverse double prime qm + [0xFF08] = "full_width_open", -- ( left parenthesis + [0xFF3B] = "full_width_open", -- [ left square brackets + [0xFF5B] = "full_width_open", -- { left curve bracket + [0xFF62] = "full_width_open", -- left corner bracket + -- + -- half width closing parenthesis + [0x0029] = "half_width_close", + [0x005D] = "half_width_close", + [0x007D] = "half_width_close", + [0x2019] = "half_width_close", -- ’ right quote, right + [0x201D] = "half_width_close", -- ” right double quote + -- + -- full width closing parenthesis + -- + [0x3009] = "full_width_close", -- 〉 book quote + [0x300B] = "full_width_close", -- 》 double book quote + [0x300D] = "full_width_close", -- 」 right quote, right + [0x300F] = "full_width_close", -- 』 right double quote + [0x3011] = "full_width_close", -- 】 right double book quote + [0x3015] = "full_width_close", -- 〕 right book quote + [0x3017] = "full_width_close", -- 〗 right double book quote + [0x3019] = "full_width_close", -- right tortoise bracket + [0x301B] = "full_width_close", -- right square bracket + [0x301E] = "full_width_close", -- double prime qm + [0x301F] = "full_width_close", -- low double prime qm + [0xFF09] = "full_width_close", -- ) right parenthesis + [0xFF3D] = "full_width_close", -- ] right square brackets + [0xFF5D] = "full_width_close", -- } right curve brackets + [0xFF63] = "full_width_close", -- right corner bracket + -- + -- vertical opening vertical + -- + -- 0xFE35, 0xFE37, 0xFE39, 0xFE3B, 0xFE3D, 0xFE3F, 0xFE41, 0xFE43, 0xFE47, + -- + -- vertical closing + -- + -- 0xFE36, 0xFE38, 0xFE3A, 0xFE3C, 0xFE3E, 0xFE40, 0xFE42, 0xFE44, 0xFE48, + -- + -- half width opening punctuation + -- + -- + -- + -- full width opening punctuation + -- + -- 0x2236, -- ∶ + -- 0xFF0C, -- , + -- + -- half width closing punctuation_hw + -- + [0x0021] = "half_width_close", -- ! + [0x002C] = "half_width_close", -- , + [0x002E] = "half_width_close", -- . + [0x003A] = "half_width_close", -- : + [0x003B] = "half_width_close", -- ; + [0x003F] = "half_width_close", -- ? + [0xFF61] = "half_width_close", -- hw full stop + -- + -- full width closing punctuation + [0x3001] = "full_width_close", -- 、 + [0x3002] = "full_width_close", -- 。 + [0xFF01] = "full_width_close", -- ! + [0xFF0C] = "full_width_close", -- , + [0xFF0E] = "full_width_close", -- . + [0xFF1A] = "full_width_close", -- : + [0xFF1B] = "full_width_close", -- ; + [0xFF1F] = "full_width_close", -- ? + -- + -- non starter + -- + [0x3005] = "non_starter", [0x3041] = "non_starter", [0x3043] = "non_starter", [0x3045] = "non_starter", [0x3047] = "non_starter", + [0x3049] = "non_starter", [0x3063] = "non_starter", [0x3083] = "non_starter", [0x3085] = "non_starter", [0x3087] = "non_starter", + [0x308E] = "non_starter", [0x3095] = "non_starter", [0x3096] = "non_starter", [0x309B] = "non_starter", [0x309C] = "non_starter", + [0x309D] = "non_starter", [0x309E] = "non_starter", [0x30A0] = "non_starter", [0x30A1] = "non_starter", [0x30A3] = "non_starter", + [0x30A5] = "non_starter", [0x30A7] = "non_starter", [0x30A9] = "non_starter", [0x30C3] = "non_starter", [0x30E3] = "non_starter", + [0x30E5] = "non_starter", [0x30E7] = "non_starter", [0x30EE] = "non_starter", [0x30F5] = "non_starter", [0x30F6] = "non_starter", + [0x30FC] = "non_starter", [0x30FD] = "non_starter", [0x30FE] = "non_starter", [0x31F0] = "non_starter", [0x31F1] = "non_starter", + [0x30F2] = "non_starter", [0x30F3] = "non_starter", [0x30F4] = "non_starter", [0x31F5] = "non_starter", [0x31F6] = "non_starter", + [0x30F7] = "non_starter", [0x30F8] = "non_starter", [0x30F9] = "non_starter", [0x31FA] = "non_starter", [0x31FB] = "non_starter", + [0x30FC] = "non_starter", [0x30FD] = "non_starter", [0x30FE] = "non_starter", [0x31FF] = "non_starter", + -- + -- hyphenation + -- + [0x2026] = "hyphen", -- … ellipsis + [0x2014] = "hyphen", -- — hyphen + } + + for i=0x03040,0x0309F do if not hash[i] then hash[i] = "chinese" end end + for i=0x030A0,0x030FF do if not hash[i] then hash[i] = "chinese" end end + for i=0x031F0,0x031FF do if not hash[i] then hash[i] = "chinese" end end + for i=0x03400,0x04DFF do if not hash[i] then hash[i] = "chinese" end end + for i=0x04E00,0x09FFF do if not hash[i] then hash[i] = "chinese" end end + for i=0x0F900,0x0FAFF do if not hash[i] then hash[i] = "chinese" end end + for i=0x0FF00,0x0FFEF do if not hash[i] then hash[i] = "chinese" end end + for i=0x20000,0x2A6DF do if not hash[i] then hash[i] = "chinese" end end + for i=0x2F800,0x2FA1F do if not hash[i] then hash[i] = "chinese" end end + for i=0x0AC00,0x0D7A3 do if not hash[i] then hash[i] = "korean" end end + for i=0x01100,0x0115F do if not hash[i] then hash[i] = "jamo_initial" end end + for i=0x01160,0x011A7 do if not hash[i] then hash[i] = "jamo_medial" end end + for i=0x011A8,0x011FF do if not hash[i] then hash[i] = "jamo_final" end end + + scripts.hash = hash + +end + +scripts.colors = { -- todo: just named colors + korean = "font:isol", + chinese = "font:rest", + full_width_open = "font:init", + full_width_close = "font:fina", + half_width_open = "font:init", + half_width_close = "font:fina", + hyphen = "font:medi", + non_starter = "font:isol", + jamo_initial = "font:init", + jamo_medial = "font:medi", + jamo_final = "font:fina", + +} + +scripts.number_to_kind = { + "korean", + "chinese", + "full_width_open", + "full_width_close", + "half_width_open", + "half_width_close", + "hyphen", + "non_starter", + "jamo_initial", + "jamo_medial", + "jamo_final", +} + +scripts.kind_to_number = { + korean = 1, + chinese = 2, + full_width_open = 3, + full_width_close = 4, + half_width_open = 5, + half_width_close = 6, + hyphen = 7, + non_starter = 8, + jamo_initial = 9, + jamo_medial = 10, + jamo_final = 11, +} + +local kind_to_number = scripts.kind_to_number +local number_to_kind = scripts.number_to_kind + +-- no, this time loading the lua always precedes the definitions +-- +-- storage.register("scripts/names", scripts.names, "scripts.names") +-- storage.register("scripts/numbers", scripts.numbers, "scripts.numbers") + +local handlers = scripts.handlers +local names = scripts.names +local numbers = scripts.numbers +local hash = scripts.hash +local colors = scripts.colors + +-- maybe also process + +function scripts.install(handler) + local name = handler.name + if not names[name] then + local n = #numbers + 1 + numbers[n] = name + names[name] = n + handlers[n] = handler + end + return names[name] +end + +function scripts.define(name) + tex.write(names[name] or attributes.unsetvalue) +end + +-- some time i will make a fonts.originals[id] + +local function colorize(start,stop) + for n in traverse_id(glyph,start) do + local kind = number_to_kind[has_attribute(n,prestat)] + if kind then + local ac = colors[kind] + if ac then + fcs(n,ac) + end + end + if n == stop then + break + end + end +end + +local function traced_process(head,first,last,process,a) + if start ~= last then + local f, l = first, last + logs.report("preprocess","before %s: %s",names[a] or "?",nodes.tosequence(f,l)) + process(head,first,last) + logs.report("preprocess","after %s: %s", names[a] or "?",nodes.tosequence(f,l)) + end +end + +-- eventually we might end up with more extensive parsing +-- todo: pass t[start..stop] == original +-- +-- one of the time consuming functiions: + +function scripts.preprocess(head) + local start = first_character(head) + if not start then + return head, false + else + local last_a, normal_process, lastfont, originals = nil, nil, nil, nil + local done, first, last, ok = false, nil, nil, false + while start do + local id = start.id + if id == glyph then + local a = has_attribute(start,preproc) + if a then + if a ~= last_a then + if first then + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + ok, done = false, true + end + first, last = nil, nil + end + last_a = a + local handler = handlers[a] + normal_process = handler.process + end + if normal_process then + local f = start.font + if f ~= lastfont then + originals = fontdata[f].originals + lastfont = f + end + local c = start.char + if originals then c = originals[c] or c end + local h = hash[c] + if h then + set_attribute(start,prestat,kind_to_number[h]) + if not first then + first, last = start, start + else + last = start + end + -- if cjk == "chinese" or cjk == "korean" then -- we need to prevent too much ( ) processing + ok = true + -- end + elseif first then + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + ok, done = false, true + end + first, last = nil, nil + end + end + elseif first then + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + ok, done = false, true + end + first, last = nil, nil + end + elseif id == glue then + if ok then + -- continue + elseif first then + -- no chinese or korean + first, last = nil, nil + end + elseif first then + if ok then + -- some chinese or korean + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + first, last, ok, done = nil, nil, false, true + elseif first then + first, last = nil, nil + end + end + start = start.next + end + if ok then + if trace_analyzing then + colorize(first,last) + end + if trace_injections then + traced_process(head,first,last,normal_process,last_a) + else + normal_process(head,first,last) + end + done = true + end + return head, done + end +end -- cgit v1.2.3