diff options
author | Hans Hagen <pragma@wxs.nl> | 2008-08-04 15:59:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2008-08-04 15:59:00 +0200 |
commit | f8ba0550d77fd6e2b307ff9dd3175fc0c613b8e2 (patch) | |
tree | ae27ca6edd0b2f1bcbe315d241b8152107d4e6a3 /tex/context/base/char-ini.lua | |
parent | 1d63a6eae86a6b78d4563ed60521449e4bf89f3c (diff) | |
download | context-f8ba0550d77fd6e2b307ff9dd3175fc0c613b8e2.tar.gz |
stable 2008.08.04 15:59
Diffstat (limited to 'tex/context/base/char-ini.lua')
-rw-r--r-- | tex/context/base/char-ini.lua | 387 |
1 files changed, 295 insertions, 92 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index f44eb8aca..de3266b73 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -6,10 +6,10 @@ if not modules then modules = { } end modules ['char-ini'] = { license = "see context related readme files" } -utf = utf or unicode.utf tex = tex or { } +xml = xml or { } -local format = string.format +local format, texsprint, utfchar, utfbyte = string.format, tex.sprint, unicode.utf8.char, unicode.utf8.byte --[[ldx-- <p>This module implements some methods and creates additional datastructured @@ -22,6 +22,199 @@ characters.data = characters.data or { } characters.synonyms = characters.synonyms or { } characters.context = characters.context or { } +characters.blocks={ + ["aegeannumbers"] = { 0x10100, 0x1013F, "Aegean Numbers" }, + ["alphabeticpresentationforms"] = { 0xFB00, 0xFB4F, "Alphabetic Presentation Forms" }, + ["ancientgreekmusicalnotation"] = { 0x1D200, 0x1D24F, "Ancient Greek Musical Notation" }, + ["ancientgreeknumbers"] = { 0x10140, 0x1018F, "Ancient Greek Numbers" }, + ["ancientsymbols"] = { 0x10190, 0x101CF, "Ancient Symbols" }, + ["arabic"] = { 0x0600, 0x06FF, "Arabic" }, + ["arabicpresentationformsa"] = { 0xFB50, 0xFDFF, "Arabic Presentation Forms-A" }, + ["arabicpresentationformsb"] = { 0xFE70, 0xFEFF, "Arabic Presentation Forms-B" }, + ["arabicsupplement"] = { 0x0750, 0x077F, "Arabic Supplement" }, + ["armenian"] = { 0x0530, 0x058F, "Armenian" }, + ["arrows"] = { 0x2190, 0x21FF, "Arrows" }, + ["balinese"] = { 0x1B00, 0x1B7F, "Balinese" }, + ["basiclatin"] = { 0x0000, 0x007F, "Basic Latin" }, + ["bengali"] = { 0x0980, 0x09FF, "Bengali" }, + ["blockelements"] = { 0x2580, 0x259F, "Block Elements" }, + ["bopomofo"] = { 0x3100, 0x312F, "Bopomofo" }, + ["bopomofoextended"] = { 0x31A0, 0x31BF, "Bopomofo Extended" }, + ["boxdrawing"] = { 0x2500, 0x257F, "Box Drawing" }, + ["braillepatterns"] = { 0x2800, 0x28FF, "Braille Patterns" }, + ["buginese"] = { 0x1A00, 0x1A1F, "Buginese" }, + ["buhid"] = { 0x1740, 0x175F, "Buhid" }, + ["byzantinemusicalsymbols"] = { 0x1D000, 0x1D0FF, "Byzantine Musical Symbols" }, + ["carian"] = { 0x102A0, 0x102DF, "Carian" }, + ["cham"] = { 0xAA00, 0xAA5F, "Cham" }, + ["cherokee"] = { 0x13A0, 0x13FF, "Cherokee" }, + ["cjkcompatibility"] = { 0x3300, 0x33FF, "CJK Compatibility" }, + ["cjkcompatibilityforms"] = { 0xFE30, 0xFE4F, "CJK Compatibility Forms" }, + ["cjkcompatibilityideographs"] = { 0xF900, 0xFAFF, "CJK Compatibility Ideographs" }, + ["cjkcompatibilityideographssupplement"] = { 0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement" }, + ["cjkradicalssupplement"] = { 0x2E80, 0x2EFF, "CJK Radicals Supplement" }, + ["cjkstrokes"] = { 0x31C0, 0x31EF, "CJK Strokes" }, + ["cjksymbolsandpunctuation"] = { 0x3000, 0x303F, "CJK Symbols and Punctuation" }, + ["cjkunifiedideographs"] = { 0x4E00, 0x9FFF, "CJK Unified Ideographs" }, + ["cjkunifiedideographsextensiona"] = { 0x3400, 0x4DBF, "CJK Unified Ideographs Extension A" }, + ["cjkunifiedideographsextensionb"] = { 0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B" }, + ["combiningdiacriticalmarks"] = { 0x0300, 0x036F, "Combining Diacritical Marks" }, + ["combiningdiacriticalmarksforsymbols"] = { 0x20D0, 0x20FF, "Combining Diacritical Marks for Symbols" }, + ["combiningdiacriticalmarkssupplement"] = { 0x1DC0, 0x1DFF, "Combining Diacritical Marks Supplement" }, + ["combininghalfmarks"] = { 0xFE20, 0xFE2F, "Combining Half Marks" }, + ["controlpictures"] = { 0x2400, 0x243F, "Control Pictures" }, + ["coptic"] = { 0x2C80, 0x2CFF, "Coptic" }, + ["countingrodnumerals"] = { 0x1D360, 0x1D37F, "Counting Rod Numerals" }, + ["cuneiform"] = { 0x12000, 0x123FF, "Cuneiform" }, + ["cuneiformnumbersandpunctuation"] = { 0x12400, 0x1247F, "Cuneiform Numbers and Punctuation" }, + ["currencysymbols"] = { 0x20A0, 0x20CF, "Currency Symbols" }, + ["cypriotsyllabary"] = { 0x10800, 0x1083F, "Cypriot Syllabary" }, + ["cyrillic"] = { 0x0400, 0x04FF, "Cyrillic" }, + ["cyrillicextendeda"] = { 0x2DE0, 0x2DFF, "Cyrillic Extended-A" }, + ["cyrillicextendedb"] = { 0xA640, 0xA69F, "Cyrillic Extended-B" }, + ["cyrillicsupplement"] = { 0x0500, 0x052F, "Cyrillic Supplement" }, + ["deseret"] = { 0x10400, 0x1044F, "Deseret" }, + ["devanagari"] = { 0x0900, 0x097F, "Devanagari" }, + ["dingbats"] = { 0x2700, 0x27BF, "Dingbats" }, + ["dominotiles"] = { 0x1F030, 0x1F09F, "Domino Tiles" }, + ["enclosedalphanumerics"] = { 0x2460, 0x24FF, "Enclosed Alphanumerics" }, + ["enclosedcjklettersandmonths"] = { 0x3200, 0x32FF, "Enclosed CJK Letters and Months" }, + ["ethiopic"] = { 0x1200, 0x137F, "Ethiopic" }, + ["ethiopicextended"] = { 0x2D80, 0x2DDF, "Ethiopic Extended" }, + ["ethiopicsupplement"] = { 0x1380, 0x139F, "Ethiopic Supplement" }, + ["generalpunctuation"] = { 0x2000, 0x206F, "General Punctuation" }, + ["geometricshapes"] = { 0x25A0, 0x25FF, "Geometric Shapes" }, + ["georgian"] = { 0x10A0, 0x10FF, "Georgian" }, + ["georgiansupplement"] = { 0x2D00, 0x2D2F, "Georgian Supplement" }, + ["glagolitic"] = { 0x2C00, 0x2C5F, "Glagolitic" }, + ["gothic"] = { 0x10330, 0x1034F, "Gothic" }, + ["greekandcoptic"] = { 0x0370, 0x03FF, "Greek and Coptic" }, + ["greekextended"] = { 0x1F00, 0x1FFF, "Greek Extended" }, + ["gujarati"] = { 0x0A80, 0x0AFF, "Gujarati" }, + ["gurmukhi"] = { 0x0A00, 0x0A7F, "Gurmukhi" }, + ["halfwidthandfullwidthforms"] = { 0xFF00, 0xFFEF, "Halfwidth and Fullwidth Forms" }, + ["hangulcompatibilityjamo"] = { 0x3130, 0x318F, "Hangul Compatibility Jamo" }, + ["hanguljamo"] = { 0x1100, 0x11FF, "Hangul Jamo" }, + ["hangulsyllables"] = { 0xAC00, 0xD7AF, "Hangul Syllables" }, + ["hanunoo"] = { 0x1720, 0x173F, "Hanunoo" }, + ["hebrew"] = { 0x0590, 0x05FF, "Hebrew" }, + ["highprivateusesurrogates"] = { 0xDB80, 0xDBFF, "High Private Use Surrogates" }, + ["highsurrogates"] = { 0xD800, 0xDB7F, "High Surrogates" }, + ["hiragana"] = { 0x3040, 0x309F, "Hiragana" }, + ["ideographicdescriptioncharacters"] = { 0x2FF0, 0x2FFF, "Ideographic Description Characters" }, + ["ipaextensions"] = { 0x0250, 0x02AF, "IPA Extensions" }, + ["kanbun"] = { 0x3190, 0x319F, "Kanbun" }, + ["kangxiradicals"] = { 0x2F00, 0x2FDF, "Kangxi Radicals" }, + ["kannada"] = { 0x0C80, 0x0CFF, "Kannada" }, + ["katakana"] = { 0x30A0, 0x30FF, "Katakana" }, + ["katakanaphoneticextensions"] = { 0x31F0, 0x31FF, "Katakana Phonetic Extensions" }, + ["kayahli"] = { 0xA900, 0xA92F, "Kayah Li" }, + ["kharoshthi"] = { 0x10A00, 0x10A5F, "Kharoshthi" }, + ["khmer"] = { 0x1780, 0x17FF, "Khmer" }, + ["khmersymbols"] = { 0x19E0, 0x19FF, "Khmer Symbols" }, + ["lao"] = { 0x0E80, 0x0EFF, "Lao" }, + ["latinextendeda"] = { 0x0100, 0x017F, "Latin Extended-A" }, + ["latinextendedadditional"] = { 0x1E00, 0x1EFF, "Latin Extended Additional" }, + ["latinextendedb"] = { 0x0180, 0x024F, "Latin Extended-B" }, + ["latinextendedc"] = { 0x2C60, 0x2C7F, "Latin Extended-C" }, + ["latinextendedd"] = { 0xA720, 0xA7FF, "Latin Extended-D" }, + ["latinsupplement"] = { 0x0080, 0x00FF, "Latin-1 Supplement" }, + ["lepcha"] = { 0x1C00, 0x1C4F, "Lepcha" }, + ["letterlikesymbols"] = { 0x2100, 0x214F, "Letterlike Symbols" }, + ["limbu"] = { 0x1900, 0x194F, "Limbu" }, + ["linearbideograms"] = { 0x10080, 0x100FF, "Linear B Ideograms" }, + ["linearbsyllabary"] = { 0x10000, 0x1007F, "Linear B Syllabary" }, + ["lowsurrogates"] = { 0xDC00, 0xDFFF, "Low Surrogates" }, + ["lycian"] = { 0x10280, 0x1029F, "Lycian" }, + ["lydian"] = { 0x10920, 0x1093F, "Lydian" }, + ["mahjongtiles"] = { 0x1F000, 0x1F02F, "Mahjong Tiles" }, + ["malayalam"] = { 0x0D00, 0x0D7F, "Malayalam" }, + ["mathematicalalphanumericsymbols"] = { 0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols" }, + ["mathematicaloperators"] = { 0x2200, 0x22FF, "Mathematical Operators" }, + ["miscellaneousmathematicalsymbolsa"] = { 0x27C0, 0x27EF, "Miscellaneous Mathematical Symbols-A" }, + ["miscellaneousmathematicalsymbolsb"] = { 0x2980, 0x29FF, "Miscellaneous Mathematical Symbols-B" }, + ["miscellaneoussymbols"] = { 0x2600, 0x26FF, "Miscellaneous Symbols" }, + ["miscellaneoussymbolsandarrows"] = { 0x2B00, 0x2BFF, "Miscellaneous Symbols and Arrows" }, + ["miscellaneoustechnical"] = { 0x2300, 0x23FF, "Miscellaneous Technical" }, + ["modifiertoneletters"] = { 0xA700, 0xA71F, "Modifier Tone Letters" }, + ["mongolian"] = { 0x1800, 0x18AF, "Mongolian" }, + ["musicalsymbols"] = { 0x1D100, 0x1D1FF, "Musical Symbols" }, + ["myanmar"] = { 0x1000, 0x109F, "Myanmar" }, + ["newtailue"] = { 0x1980, 0x19DF, "New Tai Lue" }, + ["nko"] = { 0x07C0, 0x07FF, "NKo" }, + ["numberforms"] = { 0x2150, 0x218F, "Number Forms" }, + ["ogham"] = { 0x1680, 0x169F, "Ogham" }, + ["olchiki"] = { 0x1C50, 0x1C7F, "Ol Chiki" }, + ["olditalic"] = { 0x10300, 0x1032F, "Old Italic" }, + ["oldpersian"] = { 0x103A0, 0x103DF, "Old Persian" }, + ["opticalcharacterrecognition"] = { 0x2440, 0x245F, "Optical Character Recognition" }, + ["oriya"] = { 0x0B00, 0x0B7F, "Oriya" }, + ["osmanya"] = { 0x10480, 0x104AF, "Osmanya" }, + ["phagspa"] = { 0xA840, 0xA87F, "Phags-pa" }, + ["phaistosdisc"] = { 0x101D0, 0x101FF, "Phaistos Disc" }, + ["phoenician"] = { 0x10900, 0x1091F, "Phoenician" }, + ["phoneticextensions"] = { 0x1D00, 0x1D7F, "Phonetic Extensions" }, + ["phoneticextensionssupplement"] = { 0x1D80, 0x1DBF, "Phonetic Extensions Supplement" }, + ["privateusearea"] = { 0xE000, 0xF8FF, "Private Use Area" }, + ["rejang"] = { 0xA930, 0xA95F, "Rejang" }, + ["runic"] = { 0x16A0, 0x16FF, "Runic" }, + ["saurashtra"] = { 0xA880, 0xA8DF, "Saurashtra" }, + ["shavian"] = { 0x10450, 0x1047F, "Shavian" }, + ["sinhala"] = { 0x0D80, 0x0DFF, "Sinhala" }, + ["smallformvariants"] = { 0xFE50, 0xFE6F, "Small Form Variants" }, + ["spacingmodifierletters"] = { 0x02B0, 0x02FF, "Spacing Modifier Letters" }, + ["specials"] = { 0xFFF0, 0xFFFF, "Specials" }, + ["sundanese"] = { 0x1B80, 0x1BBF, "Sundanese" }, + ["superscriptsandsubscripts"] = { 0x2070, 0x209F, "Superscripts and Subscripts" }, + ["supplementalarrowsa"] = { 0x27F0, 0x27FF, "Supplemental Arrows-A" }, + ["supplementalarrowsb"] = { 0x2900, 0x297F, "Supplemental Arrows-B" }, + ["supplementalmathematicaloperators"] = { 0x2A00, 0x2AFF, "Supplemental Mathematical Operators" }, + ["supplementalpunctuation"] = { 0x2E00, 0x2E7F, "Supplemental Punctuation" }, + ["supplementaryprivateuseareaa"] = { 0xF0000, 0xFFFFF, "Supplementary Private Use Area-A" }, + ["supplementaryprivateuseareab"] = { 0x100000, 0x10FFFF, "Supplementary Private Use Area-B" }, + ["sylotinagri"] = { 0xA800, 0xA82F, "Syloti Nagri" }, + ["syriac"] = { 0x0700, 0x074F, "Syriac" }, + ["tagalog"] = { 0x1700, 0x171F, "Tagalog" }, + ["tagbanwa"] = { 0x1760, 0x177F, "Tagbanwa" }, + ["tags"] = { 0xE0000, 0xE007F, "Tags" }, + ["taile"] = { 0x1950, 0x197F, "Tai Le" }, + ["taixuanjingsymbols"] = { 0x1D300, 0x1D35F, "Tai Xuan Jing Symbols" }, + ["tamil"] = { 0x0B80, 0x0BFF, "Tamil" }, + ["telugu"] = { 0x0C00, 0x0C7F, "Telugu" }, + ["thaana"] = { 0x0780, 0x07BF, "Thaana" }, + ["thai"] = { 0x0E00, 0x0E7F, "Thai" }, + ["tibetan"] = { 0x0F00, 0x0FFF, "Tibetan" }, + ["tifinagh"] = { 0x2D30, 0x2D7F, "Tifinagh" }, + ["ugaritic"] = { 0x10380, 0x1039F, "Ugaritic" }, + ["unifiedcanadianaboriginalsyllabics"] = { 0x1400, 0x167F, "Unified Canadian Aboriginal Syllabics" }, + ["vai"] = { 0xA500, 0xA63F, "Vai" }, + ["variationselectors"] = { 0xFE00, 0xFE0F, "Variation Selectors" }, + ["variationselectorssupplement"] = { 0xE0100, 0xE01EF, "Variation Selectors Supplement" }, + ["verticalforms"] = { 0xFE10, 0xFE1F, "Vertical Forms" }, + ["yijinghexagramsymbols"] = { 0x4DC0, 0x4DFF, "Yijing Hexagram Symbols" }, + ["yiradicals"] = { 0xA490, 0xA4CF, "Yi Radicals" }, + ["yisyllables"] = { 0xA000, 0xA48F, "Yi Syllables" }, +} + +function characters.getrange(name) + local tag = name:lower() + tag = name:gsub("[^a-z]", "") + local range = characters.blocks[tag] + if range then + return range[1], range[2] + end + name = name:gsub('"',"0x") -- goodie: tex hex notation + local start, stop = name:match("^(.-)[%-%:](.-)$") + if start and stop then + start, stop = tonumber(start,16) or tonumber(start), tonumber(stop,16) or tonumber(stop) + if start and stop then + return start, stop + end + end + local slot = tonumber(name,16) or tonumber(name) + return slot, slot +end + characters.categories = { lu = "Letter Uppercase", ll = "Letter Lowercase", @@ -66,7 +259,7 @@ characters.is_command = table.tohash { "cf","zs" } --- linebreak: +-- linebreak: todo: hash -- -- normative : BK CR LF CM SG GL CB SP ZW NL WJ JL JV JT H2 H3 -- informative : XX OP CL QU NS EX SY IS PR PO NU AL ID IN HY BB BA SA AI B2 @@ -75,12 +268,32 @@ characters.is_command = table.tohash { -- -- N A H W F Na -do - local _empty_table_ = { __index = function(t,k) return "" end } +characters.bidi = { + l = "Left-to-Right", + lre = "Left-to-Right Embedding", + lro = "Left-to-Right Override", + r = "Right-to-Left", + al = "Right-to-Left Arabic", + rle = "Right-to-Left Embedding", + rlo = "Right-to-Left Override", + pdf = "Pop Directional Format", + en = "European Number", + es = "European Number Separator", + et = "European Number Terminator", + an = "Arabic Number", + cs = "Common Number Separator", + nsm = "Non-Spacing Mark", + bn = "Boundary Neutral", + b = "Paragraph Separator", + s = "Segment Separator", + ws = "Whitespace", + on = "Other Neutrals", +} - function table.set_empty_metatable(t) - setmetatable(t,_empty_table_) - end +local _empty_table_ = { __index = function(t,k) return "" end } + +function table.set_empty_metatable(t) + setmetatable(t,_empty_table_) end table.set_empty_metatable(characters.data) @@ -92,13 +305,14 @@ table we derive a few more.</p> -- used ? -characters.context.unicodes = characters.context.unicodes or { } -characters.context.utfcodes = characters.context.utfcodes or { } -characters.context.enccodes = characters.context.enccodes or { } -characters.context.fallbacks = characters.context.fallbacks or { } +characters.unicodes = characters.unicodes or { } +characters.utfcodes = characters.utfcodes or { } +characters.enccodes = characters.enccodes or { } +characters.fallbacks = characters.fallbacks or { } +characters.directions = characters.directions or { } function characters.context.rehash() - local unicodes, utfcodes, enccodes, fallbacks, utfchar = characters.context.unicodes, characters.context.utfcodes, characters.context.enccodes, characters.context.fallbacks, utf.char + local unicodes, utfcodes, enccodes, fallbacks, directions = characters.unicodes, characters.utfcodes, characters.enccodes, characters.fallbacks, characters.directions for k,v in pairs(characters.data) do local contextname, adobename, specials = v.contextname, v.adobename, v.specials if contextname then @@ -115,12 +329,21 @@ function characters.context.rehash() fallbacks[k] = s fallbacks[s] = k end + directions[k] = v.direction end for name,code in pairs(characters.synonyms) do if not enccodes[name] then enccodes[name] = code end end end +-- maybe some day, no significate speed up now + +--~ input.storage.register(false, "characters.unicodes", characters.unicodes, "characters.unicodes") +--~ input.storage.register(false, "characters.utfcodes", characters.utfcodes, "characters.utfcodes") +--~ input.storage.register(false, "characters.enccodes", characters.enccodes, "characters.enccodes") +--~ input.storage.register(false, "characters.fallbacks", characters.fallbacks, "characters.fallbacks") +--~ input.storage.register(false, "characters.directions", characters.directions, "characters.directions") + --[[ldx-- <p>The <type>context</type> namespace is used to store methods and data which is rather specific to <l n='context'/>.</p> @@ -131,9 +354,9 @@ function characters.context.show(n) local d = characters.data[n] if d then local function entry(label,name) - tex.sprint(tex.ctxcatcodes,format("\\NC %s\\NC %s\\NC\\NR",label,characters.valid(d[name]))) + texsprint(tex.ctxcatcodes,format("\\NC %s\\NC %s\\NC\\NR",label,characters.valid(d[name]))) end - tex.sprint(tex.ctxcatcodes,"\\starttabulate[|Tl|Tl|]") + texsprint(tex.ctxcatcodes,"\\starttabulate[|Tl|Tl|]") entry("unicode index" , "unicodeslot") entry("context name" , "contextname") entry("adobe name" , "adobename") @@ -142,7 +365,7 @@ function characters.context.show(n) entry("uppercase code", "uccode") entry("lowercase code", "lccode") entry("specials" , "specials") - tex.sprint(tex.ctxcatcodes,"\\stoptabulate ") + texsprint(tex.ctxcatcodes,"\\stoptabulate ") end end @@ -151,57 +374,40 @@ end use the table. After all, we have this information available anyway.</p> --ldx]]-- -function characters.makeactive(n,name) - tex.sprint(tex.ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utf.char(n),name)) +function characters.makeactive(n,name) -- let ? + texsprint(tex.ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)) end function tex.uprint(n) - tex.sprint(tex.ctxcatcodes,utf.char(n)) + texsprint(tex.ctxcatcodes,utfchar(n)) end ---~ function characters.context.define() ---~ local unicodes, utfcodes = characters.context.unicodes, characters.context.utfcodes ---~ local flush, tc, char = tex.sprint, tex.ctxcatcodes, utf.char ---~ local is_character, is_command = characters.is_character, characters.is_command ---~ for u, chr in pairs(characters.data) do ---~ local contextname = chr.contextname ---~ if contextname then ---~ local category = chr.category ---~ if is_character[category] then ---~ -- by this time, we're still in normal catcode mode ---~ if chr.unicodeslot < 128 then ---~ flush(tc, "\\chardef\\" .. contextname .. "=" .. u) -- unicodes[contextname]) ---~ else ---~ flush(tc, "\\let\\" .. contextname .. "=" .. char(u)) -- utfcodes[contextname]) ---~ end ---~ elseif is_command[category] then ---~ flush("\\catcode"..u.."=13\\unexpanded\\def "..char(u).."{\\"..contextname.."}") ---~ -- characters.makeactive(u,contextname) ---~ end ---~ end ---~ end ---~ end - characters.activated = { } function characters.context.define() - local unicodes, utfcodes = characters.context.unicodes, characters.context.utfcodes - local flush, tc, char = tex.sprint, tex.ctxcatcodes, utf.char + local unicodes, utfcodes = characters.unicodes, characters.utfcodes + local tc = tex.ctxcatcodes local is_character, is_command = characters.is_character, characters.is_command for u, chr in pairs(characters.data) do - local contextname = chr.contextname - if contextname then - local category = chr.category - if is_character[category] then - -- by this time, we're still in normal catcode mode - if chr.unicodeslot < 128 then - flush(tc, "\\chardef\\" .. contextname .. "=" .. u) -- unicodes[contextname]) - else - flush(tc, "\\let\\" .. contextname .. "=" .. char(u)) -- utfcodes[contextname]) - end - elseif is_command[category] then - flush("{\\catcode"..u.."=13\\unexpanded\\gdef "..char(u).."{\\"..contextname.."}}") - characters.activated[u] = true + local fallback = chr.fallback + if fallback then + texsprint("{\\catcode"..u.."=13\\unexpanded\\gdef "..utfchar(u).."{\\checkedchar{"..u.."}{"..fallback.."}}}") + characters.activated[u] = true + else + local contextname = chr.contextname + if contextname then + local category = chr.category + if is_character[category] then + -- by this time, we're still in normal catcode mode + if chr.unicodeslot < 128 then + texsprint(tc, "\\chardef\\" .. contextname .. "=" .. u) -- unicodes[contextname]) + else + texsprint(tc, "\\let\\" .. contextname .. "=" .. utfchar(u)) -- utfcodes[contextname]) + end + elseif is_command[category] then + texsprint("{\\catcode"..u.."=13\\unexpanded\\gdef "..utfchar(u).."{\\"..contextname.."}}") + characters.activated[u] = true + end end end end @@ -209,14 +415,14 @@ end function characters.context.activate() for u,_ in pairs(characters.activated) do - tex.sprint(tex.ctxcatcodes,"\\catcode "..u.."=13 ") + texsprint(tex.ctxcatcodes,"\\catcode "..u.."=13 ") end end function characters.charcode(box) local b = tex.box[box] local l = b.list - tex.sprint((l and l.id == node.id('glyph') and l.char) or 0) + texsprint((l and l.id == node.id('glyph') and l.char) or 0) end --[[ldx-- @@ -224,14 +430,14 @@ end --ldx]]-- function characters.setcodes() - local flush, tc, format = tex.sprint, tex.ctxcatcodes, string.format + local tc = tex.ctxcatcodes for code, chr in pairs(characters.data) do local cc = chr.category if cc == 'll' or cc == 'lu' or cc == 'lt' then local lc, uc = chr.lccode, chr.uccode if not lc then chr.lccode, lc = code, code end if not uc then chr.uccode, uc = code, code end - flush(tc, format("\\setcclcuc %i %i %i ",code,lc,uc)) + texsprint(tc, format("\\setcclcuc %i %i %i ",code,lc,uc)) end end end @@ -325,9 +531,9 @@ function characters.lccode(n) return characters.data[n].lccode or n end function characters.flush(n) local c = characters.data[n] if c and c.contextname then - tex.sprint(tex.texcatcodes, "\\"..c.contextname) + texsprint(tex.texcatcodes, "\\"..c.contextname) else - tex.sprint(unicode.utf8.char(n)) + texsprint(unicode.utf8.char(n)) end end @@ -348,7 +554,7 @@ end function characters.is_of_category(token,category) if type(token) == "string" then - return characters.data[utf.byte(token)].category == category + return characters.data[utfbyte(token)].category == category else return characters.data[token].category == category end @@ -360,7 +566,7 @@ function characters.i_is_of_category(i,category) -- by index (number) end function characters.n_is_of_category(n,category) -- by name (string) - local cd = characters.data[utf.byte(n)] + local cd = characters.data[utfbyte(n)] return cd and cd.category == category end @@ -370,39 +576,36 @@ unicode reference tables.</p> --ldx]]-- function characters.setpdfunicodes() ---~ local flush, tc, sf = tex.sprint, tex.ctxcatcodes, string.format +--~ local tc = tex.ctxcatcodes --~ for _,v in pairs(characters.data) do --~ if v.adobename then ---~ flush(tc,sf("\\pdfglyphtounicode{%s}{%04X}", v.adobename, v.unicodeslot)) +--~ texsprint(tc,format("\\pdfglyphtounicode{%s}{%04X}", v.adobename, v.unicodeslot)) --~ end --~ end end ---[[ldx-- -<p>The next method generates a table for good old <l n='pdftex'/>.</p> +-- xml support -<typing> -characters.pdftex.make_pdf_to_unicodetable("pdfr-def.tex") -</typing> ---ldx]]-- +characters.active_offset = 0x10000 -characters.pdftex = characters.pdftex or { } - -function characters.pdftex.make_pdf_to_unicodetable(filename) ---~ f = io.open(filename,'w') ---~ if f then ---~ f:write("% This file is generated with Luatex using the\n") ---~ f:write("% character tables that come with ConTeXt MkIV.\n") ---~ f:write("%\n") ---~ f:write("\\ifx\\pdfglyphtounicode\\undefined\\endinput\\fi\n") -- just to be sure ---~ for _, v in pairs(characters.data) do ---~ if v.adobename then ---~ f:write(format("\\pdfglyphtounicode{%s}{%04X}", v.adobename, v.unicodeslot)) ---~ end ---~ end ---~ f:write("%\n") ---~ f:write("%\n") ---~ f:write("\\endinput") ---~ f:close() ---~ end +xml.entities = xml.entities or { } + +input.storage.register(false,"xml/entities",xml.entities,"xml.entities") -- this will move to lxml + +function characters.remapentity(chr,slot) + texsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr)) +end + +function characters.setmkiventities() + local entities = xml.entities + entities.lt = "<" + entities.amp = "&" + entities.gt = ">" +end + +function characters.setmkiientities() + local entities = xml.entities + entities.lt = utfchar(characters.active_offset + utfbyte("<")) + entities.amp = utfchar(characters.active_offset + utfbyte("&")) + entities.gt = utfchar(characters.active_offset + utfbyte(">")) end |