diff options
| author | Hans Hagen <pragma@wxs.nl> | 2009-05-28 11:23:00 +0200 | 
|---|---|---|
| committer | Hans Hagen <pragma@wxs.nl> | 2009-05-28 11:23:00 +0200 | 
| commit | 1d3090326210c6e6f7ec5432799ded25b75bba46 (patch) | |
| tree | c5921203789ec669e6bccaba4bd56f9c072dc56b /tex/context/base/char-ini.lua | |
| parent | 94d83f84758766511c5e324721e39fea6ab71dae (diff) | |
| download | context-1d3090326210c6e6f7ec5432799ded25b75bba46.tar.gz | |
beta 2009.05.28 11:23
Diffstat (limited to 'tex/context/base/char-ini.lua')
| -rw-r--r-- | tex/context/base/char-ini.lua | 619 | 
1 files changed, 314 insertions, 305 deletions
| diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 0dd7a266a..974366b7a 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -9,202 +9,238 @@ if not modules then modules = { } end modules ['char-ini'] = {  tex = tex or { }  xml = xml or { } -local format, texsprint, utfchar, utfbyte, concat = string.format, tex.sprint, unicode.utf8.char, unicode.utf8.byte, table.concat +local utf = unicode.utf8 + +local utfchar, utfbyte = utf.char, utf.byte +local concat = table.concat +local next, tonumber = next, tonumber +local texsprint, texprint = tex.sprint, tex.print +local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch + +local ctxcatcodes = tex.ctxcatcodes +local texcatcodes = tex.texcatcodes  --[[ldx--  <p>This module implements some methods and creates additional datastructured  from the big character table that we use for all kind of purposes:  <type>char-def.lua</type>.</p> + +<p>We assume that at this point <type>characters.data</type> is already +loaded!</p>  --ldx]]-- -characters          = characters          or { } -characters.data     = characters.data     or { } -characters.synonyms = characters.synonyms or { } -characters.context  = characters.context  or { } - -characters.blocks={ -    ["aegeannumbers"] = { 0x10100, 0x1013F, "Aegean Numbers" }, -    ["alphabeticpresentationforms"] = { 0xFB00, 0xFB4F, "Alphabetic Presentation Forms" }, -    ["ancientgreekmusicalnotation"] = { 0x1D200, 0x1D24F, "Ancient Greek Musical Notation" }, -    ["ancientgreeknumbers"] = { 0x10140, 0x1018F, "Ancient Greek Numbers" }, -    ["ancientsymbols"] = { 0x10190, 0x101CF, "Ancient Symbols" }, -    ["arabic"] = { 0x0600, 0x06FF, "Arabic" }, -    ["arabicpresentationformsa"] = { 0xFB50, 0xFDFF, "Arabic Presentation Forms-A" }, -    ["arabicpresentationformsb"] = { 0xFE70, 0xFEFF, "Arabic Presentation Forms-B" }, -    ["arabicsupplement"] = { 0x0750, 0x077F, "Arabic Supplement" }, -    ["armenian"] = { 0x0530, 0x058F, "Armenian" }, -    ["arrows"] = { 0x2190, 0x21FF, "Arrows" }, -    ["balinese"] = { 0x1B00, 0x1B7F, "Balinese" }, -    ["basiclatin"] = { 0x0000, 0x007F, "Basic Latin" }, -    ["bengali"] = { 0x0980, 0x09FF, "Bengali" }, -    ["blockelements"] = { 0x2580, 0x259F, "Block Elements" }, -    ["bopomofo"] = { 0x3100, 0x312F, "Bopomofo" }, -    ["bopomofoextended"] = { 0x31A0, 0x31BF, "Bopomofo Extended" }, -    ["boxdrawing"] = { 0x2500, 0x257F, "Box Drawing" }, -    ["braillepatterns"] = { 0x2800, 0x28FF, "Braille Patterns" }, -    ["buginese"] = { 0x1A00, 0x1A1F, "Buginese" }, -    ["buhid"] = { 0x1740, 0x175F, "Buhid" }, -    ["byzantinemusicalsymbols"] = { 0x1D000, 0x1D0FF, "Byzantine Musical Symbols" }, -    ["carian"] = { 0x102A0, 0x102DF, "Carian" }, -    ["cham"] = { 0xAA00, 0xAA5F, "Cham" }, -    ["cherokee"] = { 0x13A0, 0x13FF, "Cherokee" }, -    ["cjkcompatibility"] = { 0x3300, 0x33FF, "CJK Compatibility" }, -    ["cjkcompatibilityforms"] = { 0xFE30, 0xFE4F, "CJK Compatibility Forms" }, -    ["cjkcompatibilityideographs"] = { 0xF900, 0xFAFF, "CJK Compatibility Ideographs" }, +characters      = characters      or { } +characters.data = characters.data or { } + +local data = characters.data + +if not characters.ranges then +    characters.ranges = { } +    for k, v in next, data do +        characters.ranges[#characters.ranges+1] = k +    end +end + +storage.register("characters/ranges",characters.ranges,"characters.ranges") + +local ranges = characters.ranges + +setmetatable(data, { +    __index = function(t,k) +        for r=1,#ranges do +            local rr = ranges[r] -- first in range +            if k > rr and k <= data[rr].range then +                t[k] = t[rr] +                return t[k] +            end +        end +        return nil +    end +}) + +characters.blocks = { +    ["aegeannumbers"]                        = { 0x10100, 0x1013F, "Aegean Numbers" }, +    ["alphabeticpresentationforms"]          = { 0x0FB00, 0x0FB4F, "Alphabetic Presentation Forms" }, +    ["ancientgreekmusicalnotation"]          = { 0x1D200, 0x1D24F, "Ancient Greek Musical Notation" }, +    ["ancientgreeknumbers"]                  = { 0x10140, 0x1018F, "Ancient Greek Numbers" }, +    ["ancientsymbols"]                       = { 0x10190, 0x101CF, "Ancient Symbols" }, +    ["arabic"]                               = { 0x00600, 0x006FF, "Arabic" }, +    ["arabicpresentationformsa"]             = { 0x0FB50, 0x0FDFF, "Arabic Presentation Forms-A" }, +    ["arabicpresentationformsb"]             = { 0x0FE70, 0x0FEFF, "Arabic Presentation Forms-B" }, +    ["arabicsupplement"]                     = { 0x00750, 0x0077F, "Arabic Supplement" }, +    ["armenian"]                             = { 0x00530, 0x0058F, "Armenian" }, +    ["arrows"]                               = { 0x02190, 0x021FF, "Arrows" }, +    ["balinese"]                             = { 0x01B00, 0x01B7F, "Balinese" }, +    ["basiclatin"]                           = { 0x00000, 0x0007F, "Basic Latin" }, +    ["bengali"]                              = { 0x00980, 0x009FF, "Bengali" }, +    ["blockelements"]                        = { 0x02580, 0x0259F, "Block Elements" }, +    ["bopomofo"]                             = { 0x03100, 0x0312F, "Bopomofo" }, +    ["bopomofoextended"]                     = { 0x031A0, 0x031BF, "Bopomofo Extended" }, +    ["boxdrawing"]                           = { 0x02500, 0x0257F, "Box Drawing" }, +    ["braillepatterns"]                      = { 0x02800, 0x028FF, "Braille Patterns" }, +    ["buginese"]                             = { 0x01A00, 0x01A1F, "Buginese" }, +    ["buhid"]                                = { 0x01740, 0x0175F, "Buhid" }, +    ["byzantinemusicalsymbols"]              = { 0x1D000, 0x1D0FF, "Byzantine Musical Symbols" }, +    ["carian"]                               = { 0x102A0, 0x102DF, "Carian" }, +    ["cham"]                                 = { 0x0AA00, 0x0AA5F, "Cham" }, +    ["cherokee"]                             = { 0x013A0, 0x013FF, "Cherokee" }, +    ["cjkcompatibility"]                     = { 0x03300, 0x033FF, "CJK Compatibility" }, +    ["cjkcompatibilityforms"]                = { 0x0FE30, 0x0FE4F, "CJK Compatibility Forms" }, +    ["cjkcompatibilityideographs"]           = { 0x0F900, 0x0FAFF, "CJK Compatibility Ideographs" },      ["cjkcompatibilityideographssupplement"] = { 0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement" }, -    ["cjkradicalssupplement"] = { 0x2E80, 0x2EFF, "CJK Radicals Supplement" }, -    ["cjkstrokes"] = { 0x31C0, 0x31EF, "CJK Strokes" }, -    ["cjksymbolsandpunctuation"] = { 0x3000, 0x303F, "CJK Symbols and Punctuation" }, -    ["cjkunifiedideographs"] = { 0x4E00, 0x9FFF, "CJK Unified Ideographs" }, -    ["cjkunifiedideographsextensiona"] = { 0x3400, 0x4DBF, "CJK Unified Ideographs Extension A" }, -    ["cjkunifiedideographsextensionb"] = { 0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B" }, -    ["combiningdiacriticalmarks"] = { 0x0300, 0x036F, "Combining Diacritical Marks" }, -    ["combiningdiacriticalmarksforsymbols"] = { 0x20D0, 0x20FF, "Combining Diacritical Marks for Symbols" }, -    ["combiningdiacriticalmarkssupplement"] = { 0x1DC0, 0x1DFF, "Combining Diacritical Marks Supplement" }, -    ["combininghalfmarks"] = { 0xFE20, 0xFE2F, "Combining Half Marks" }, -    ["controlpictures"] = { 0x2400, 0x243F, "Control Pictures" }, -    ["coptic"] = { 0x2C80, 0x2CFF, "Coptic" }, -    ["countingrodnumerals"] = { 0x1D360, 0x1D37F, "Counting Rod Numerals" }, -    ["cuneiform"] = { 0x12000, 0x123FF, "Cuneiform" }, -    ["cuneiformnumbersandpunctuation"] = { 0x12400, 0x1247F, "Cuneiform Numbers and Punctuation" }, -    ["currencysymbols"] = { 0x20A0, 0x20CF, "Currency Symbols" }, -    ["cypriotsyllabary"] = { 0x10800, 0x1083F, "Cypriot Syllabary" }, -    ["cyrillic"] = { 0x0400, 0x04FF, "Cyrillic" }, -    ["cyrillicextendeda"] = { 0x2DE0, 0x2DFF, "Cyrillic Extended-A" }, -    ["cyrillicextendedb"] = { 0xA640, 0xA69F, "Cyrillic Extended-B" }, -    ["cyrillicsupplement"] = { 0x0500, 0x052F, "Cyrillic Supplement" }, -    ["deseret"] = { 0x10400, 0x1044F, "Deseret" }, -    ["devanagari"] = { 0x0900, 0x097F, "Devanagari" }, -    ["dingbats"] = { 0x2700, 0x27BF, "Dingbats" }, -    ["dominotiles"] = { 0x1F030, 0x1F09F, "Domino Tiles" }, -    ["enclosedalphanumerics"] = { 0x2460, 0x24FF, "Enclosed Alphanumerics" }, -    ["enclosedcjklettersandmonths"] = { 0x3200, 0x32FF, "Enclosed CJK Letters and Months" }, -    ["ethiopic"] = { 0x1200, 0x137F, "Ethiopic" }, -    ["ethiopicextended"] = { 0x2D80, 0x2DDF, "Ethiopic Extended" }, -    ["ethiopicsupplement"] = { 0x1380, 0x139F, "Ethiopic Supplement" }, -    ["generalpunctuation"] = { 0x2000, 0x206F, "General Punctuation" }, -    ["geometricshapes"] = { 0x25A0, 0x25FF, "Geometric Shapes" }, -    ["georgian"] = { 0x10A0, 0x10FF, "Georgian" }, -    ["georgiansupplement"] = { 0x2D00, 0x2D2F, "Georgian Supplement" }, -    ["glagolitic"] = { 0x2C00, 0x2C5F, "Glagolitic" }, -    ["gothic"] = { 0x10330, 0x1034F, "Gothic" }, -    ["greekandcoptic"] = { 0x0370, 0x03FF, "Greek and Coptic" }, -    ["greekextended"] = { 0x1F00, 0x1FFF, "Greek Extended" }, -    ["gujarati"] = { 0x0A80, 0x0AFF, "Gujarati" }, -    ["gurmukhi"] = { 0x0A00, 0x0A7F, "Gurmukhi" }, -    ["halfwidthandfullwidthforms"] = { 0xFF00, 0xFFEF, "Halfwidth and Fullwidth Forms" }, -    ["hangulcompatibilityjamo"] = { 0x3130, 0x318F, "Hangul Compatibility Jamo" }, -    ["hanguljamo"] = { 0x1100, 0x11FF, "Hangul Jamo" }, -    ["hangulsyllables"] = { 0xAC00, 0xD7AF, "Hangul Syllables" }, -    ["hanunoo"] = { 0x1720, 0x173F, "Hanunoo" }, -    ["hebrew"] = { 0x0590, 0x05FF, "Hebrew" }, -    ["highprivateusesurrogates"] = { 0xDB80, 0xDBFF, "High Private Use Surrogates" }, -    ["highsurrogates"] = { 0xD800, 0xDB7F, "High Surrogates" }, -    ["hiragana"] = { 0x3040, 0x309F, "Hiragana" }, -    ["ideographicdescriptioncharacters"] = { 0x2FF0, 0x2FFF, "Ideographic Description Characters" }, -    ["ipaextensions"] = { 0x0250, 0x02AF, "IPA Extensions" }, -    ["kanbun"] = { 0x3190, 0x319F, "Kanbun" }, -    ["kangxiradicals"] = { 0x2F00, 0x2FDF, "Kangxi Radicals" }, -    ["kannada"] = { 0x0C80, 0x0CFF, "Kannada" }, -    ["katakana"] = { 0x30A0, 0x30FF, "Katakana" }, -    ["katakanaphoneticextensions"] = { 0x31F0, 0x31FF, "Katakana Phonetic Extensions" }, -    ["kayahli"] = { 0xA900, 0xA92F, "Kayah Li" }, -    ["kharoshthi"] = { 0x10A00, 0x10A5F, "Kharoshthi" }, -    ["khmer"] = { 0x1780, 0x17FF, "Khmer" }, -    ["khmersymbols"] = { 0x19E0, 0x19FF, "Khmer Symbols" }, -    ["lao"] = { 0x0E80, 0x0EFF, "Lao" }, -    ["latinextendeda"] = { 0x0100, 0x017F, "Latin Extended-A" }, -    ["latinextendedadditional"] = { 0x1E00, 0x1EFF, "Latin Extended Additional" }, -    ["latinextendedb"] = { 0x0180, 0x024F, "Latin Extended-B" }, -    ["latinextendedc"] = { 0x2C60, 0x2C7F, "Latin Extended-C" }, -    ["latinextendedd"] = { 0xA720, 0xA7FF, "Latin Extended-D" }, -    ["latinsupplement"] = { 0x0080, 0x00FF, "Latin-1 Supplement" }, -    ["lepcha"] = { 0x1C00, 0x1C4F, "Lepcha" }, -    ["letterlikesymbols"] = { 0x2100, 0x214F, "Letterlike Symbols" }, -    ["limbu"] = { 0x1900, 0x194F, "Limbu" }, -    ["linearbideograms"] = { 0x10080, 0x100FF, "Linear B Ideograms" }, -    ["linearbsyllabary"] = { 0x10000, 0x1007F, "Linear B Syllabary" }, -    ["lowsurrogates"] = { 0xDC00, 0xDFFF, "Low Surrogates" }, -    ["lycian"] = { 0x10280, 0x1029F, "Lycian" }, -    ["lydian"] = { 0x10920, 0x1093F, "Lydian" }, -    ["mahjongtiles"] = { 0x1F000, 0x1F02F, "Mahjong Tiles" }, -    ["malayalam"] = { 0x0D00, 0x0D7F, "Malayalam" }, -    ["mathematicalalphanumericsymbols"] = { 0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols" }, -    ["mathematicaloperators"] = { 0x2200, 0x22FF, "Mathematical Operators" }, -    ["miscellaneousmathematicalsymbolsa"] = { 0x27C0, 0x27EF, "Miscellaneous Mathematical Symbols-A" }, -    ["miscellaneousmathematicalsymbolsb"] = { 0x2980, 0x29FF, "Miscellaneous Mathematical Symbols-B" }, -    ["miscellaneoussymbols"] = { 0x2600, 0x26FF, "Miscellaneous Symbols" }, -    ["miscellaneoussymbolsandarrows"] = { 0x2B00, 0x2BFF, "Miscellaneous Symbols and Arrows" }, -    ["miscellaneoustechnical"] = { 0x2300, 0x23FF, "Miscellaneous Technical" }, -    ["modifiertoneletters"] = { 0xA700, 0xA71F, "Modifier Tone Letters" }, -    ["mongolian"] = { 0x1800, 0x18AF, "Mongolian" }, -    ["musicalsymbols"] = { 0x1D100, 0x1D1FF, "Musical Symbols" }, -    ["myanmar"] = { 0x1000, 0x109F, "Myanmar" }, -    ["newtailue"] = { 0x1980, 0x19DF, "New Tai Lue" }, -    ["nko"] = { 0x07C0, 0x07FF, "NKo" }, -    ["numberforms"] = { 0x2150, 0x218F, "Number Forms" }, -    ["ogham"] = { 0x1680, 0x169F, "Ogham" }, -    ["olchiki"] = { 0x1C50, 0x1C7F, "Ol Chiki" }, -    ["olditalic"] = { 0x10300, 0x1032F, "Old Italic" }, -    ["oldpersian"] = { 0x103A0, 0x103DF, "Old Persian" }, -    ["opticalcharacterrecognition"] = { 0x2440, 0x245F, "Optical Character Recognition" }, -    ["oriya"] = { 0x0B00, 0x0B7F, "Oriya" }, -    ["osmanya"] = { 0x10480, 0x104AF, "Osmanya" }, -    ["phagspa"] = { 0xA840, 0xA87F, "Phags-pa" }, -    ["phaistosdisc"] = { 0x101D0, 0x101FF, "Phaistos Disc" }, -    ["phoenician"] = { 0x10900, 0x1091F, "Phoenician" }, -    ["phoneticextensions"] = { 0x1D00, 0x1D7F, "Phonetic Extensions" }, -    ["phoneticextensionssupplement"] = { 0x1D80, 0x1DBF, "Phonetic Extensions Supplement" }, -    ["privateusearea"] = { 0xE000, 0xF8FF, "Private Use Area" }, -    ["rejang"] = { 0xA930, 0xA95F, "Rejang" }, -    ["runic"] = { 0x16A0, 0x16FF, "Runic" }, -    ["saurashtra"] = { 0xA880, 0xA8DF, "Saurashtra" }, -    ["shavian"] = { 0x10450, 0x1047F, "Shavian" }, -    ["sinhala"] = { 0x0D80, 0x0DFF, "Sinhala" }, -    ["smallformvariants"] = { 0xFE50, 0xFE6F, "Small Form Variants" }, -    ["spacingmodifierletters"] = { 0x02B0, 0x02FF, "Spacing Modifier Letters" }, -    ["specials"] = { 0xFFF0, 0xFFFF, "Specials" }, -    ["sundanese"] = { 0x1B80, 0x1BBF, "Sundanese" }, -    ["superscriptsandsubscripts"] = { 0x2070, 0x209F, "Superscripts and Subscripts" }, -    ["supplementalarrowsa"] = { 0x27F0, 0x27FF, "Supplemental Arrows-A" }, -    ["supplementalarrowsb"] = { 0x2900, 0x297F, "Supplemental Arrows-B" }, -    ["supplementalmathematicaloperators"] = { 0x2A00, 0x2AFF, "Supplemental Mathematical Operators" }, -    ["supplementalpunctuation"] = { 0x2E00, 0x2E7F, "Supplemental Punctuation" }, -    ["supplementaryprivateuseareaa"] = { 0xF0000, 0xFFFFF, "Supplementary Private Use Area-A" }, -    ["supplementaryprivateuseareab"] = { 0x100000, 0x10FFFF, "Supplementary Private Use Area-B" }, -    ["sylotinagri"] = { 0xA800, 0xA82F, "Syloti Nagri" }, -    ["syriac"] = { 0x0700, 0x074F, "Syriac" }, -    ["tagalog"] = { 0x1700, 0x171F, "Tagalog" }, -    ["tagbanwa"] = { 0x1760, 0x177F, "Tagbanwa" }, -    ["tags"] = { 0xE0000, 0xE007F, "Tags" }, -    ["taile"] = { 0x1950, 0x197F, "Tai Le" }, -    ["taixuanjingsymbols"] = { 0x1D300, 0x1D35F, "Tai Xuan Jing Symbols" }, -    ["tamil"] = { 0x0B80, 0x0BFF, "Tamil" }, -    ["telugu"] = { 0x0C00, 0x0C7F, "Telugu" }, -    ["thaana"] = { 0x0780, 0x07BF, "Thaana" }, -    ["thai"] = { 0x0E00, 0x0E7F, "Thai" }, -    ["tibetan"] = { 0x0F00, 0x0FFF, "Tibetan" }, -    ["tifinagh"] = { 0x2D30, 0x2D7F, "Tifinagh" }, -    ["ugaritic"] = { 0x10380, 0x1039F, "Ugaritic" }, -    ["unifiedcanadianaboriginalsyllabics"] = { 0x1400, 0x167F, "Unified Canadian Aboriginal Syllabics" }, -    ["vai"] = { 0xA500, 0xA63F, "Vai" }, -    ["variationselectors"] = { 0xFE00, 0xFE0F, "Variation Selectors" }, -    ["variationselectorssupplement"] = { 0xE0100, 0xE01EF, "Variation Selectors Supplement" }, -    ["verticalforms"] = { 0xFE10, 0xFE1F, "Vertical Forms" }, -    ["yijinghexagramsymbols"] = { 0x4DC0, 0x4DFF, "Yijing Hexagram Symbols" }, -    ["yiradicals"] = { 0xA490, 0xA4CF, "Yi Radicals" }, -    ["yisyllables"] = { 0xA000, 0xA48F, "Yi Syllables" }, +    ["cjkradicalssupplement"]                = { 0x02E80, 0x02EFF, "CJK Radicals Supplement" }, +    ["cjkstrokes"]                           = { 0x031C0, 0x031EF, "CJK Strokes" }, +    ["cjksymbolsandpunctuation"]             = { 0x03000, 0x0303F, "CJK Symbols and Punctuation" }, +    ["cjkunifiedideographs"]                 = { 0x04E00, 0x09FFF, "CJK Unified Ideographs" }, +    ["cjkunifiedideographsextensiona"]       = { 0x03400, 0x04DBF, "CJK Unified Ideographs Extension A" }, +    ["cjkunifiedideographsextensionb"]       = { 0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B" }, +    ["combiningdiacriticalmarks"]            = { 0x00300, 0x0036F, "Combining Diacritical Marks" }, +    ["combiningdiacriticalmarksforsymbols"]  = { 0x020D0, 0x020FF, "Combining Diacritical Marks for Symbols" }, +    ["combiningdiacriticalmarkssupplement"]  = { 0x01DC0, 0x01DFF, "Combining Diacritical Marks Supplement" }, +    ["combininghalfmarks"]                   = { 0x0FE20, 0x0FE2F, "Combining Half Marks" }, +    ["controlpictures"]                      = { 0x02400, 0x0243F, "Control Pictures" }, +    ["coptic"]                               = { 0x02C80, 0x02CFF, "Coptic" }, +    ["countingrodnumerals"]                  = { 0x1D360, 0x1D37F, "Counting Rod Numerals" }, +    ["cuneiform"]                            = { 0x12000, 0x123FF, "Cuneiform" }, +    ["cuneiformnumbersandpunctuation"]       = { 0x12400, 0x1247F, "Cuneiform Numbers and Punctuation" }, +    ["currencysymbols"]                      = { 0x020A0, 0x020CF, "Currency Symbols" }, +    ["cypriotsyllabary"]                     = { 0x10800, 0x1083F, "Cypriot Syllabary" }, +    ["cyrillic"]                             = { 0x00400, 0x004FF, "Cyrillic" }, +    ["cyrillicextendeda"]                    = { 0x02DE0, 0x02DFF, "Cyrillic Extended-A" }, +    ["cyrillicextendedb"]                    = { 0x0A640, 0x0A69F, "Cyrillic Extended-B" }, +    ["cyrillicsupplement"]                   = { 0x00500, 0x0052F, "Cyrillic Supplement" }, +    ["deseret"]                              = { 0x10400, 0x1044F, "Deseret" }, +    ["devanagari"]                           = { 0x00900, 0x0097F, "Devanagari" }, +    ["dingbats"]                             = { 0x02700, 0x027BF, "Dingbats" }, +    ["dominotiles"]                          = { 0x1F030, 0x1F09F, "Domino Tiles" }, +    ["enclosedalphanumerics"]                = { 0x02460, 0x024FF, "Enclosed Alphanumerics" }, +    ["enclosedcjklettersandmonths"]          = { 0x03200, 0x032FF, "Enclosed CJK Letters and Months" }, +    ["ethiopic"]                             = { 0x01200, 0x0137F, "Ethiopic" }, +    ["ethiopicextended"]                     = { 0x02D80, 0x02DDF, "Ethiopic Extended" }, +    ["ethiopicsupplement"]                   = { 0x01380, 0x0139F, "Ethiopic Supplement" }, +    ["generalpunctuation"]                   = { 0x02000, 0x0206F, "General Punctuation" }, +    ["geometricshapes"]                      = { 0x025A0, 0x025FF, "Geometric Shapes" }, +    ["georgian"]                             = { 0x010A0, 0x010FF, "Georgian" }, +    ["georgiansupplement"]                   = { 0x02D00, 0x02D2F, "Georgian Supplement" }, +    ["glagolitic"]                           = { 0x02C00, 0x02C5F, "Glagolitic" }, +    ["gothic"]                               = { 0x10330, 0x1034F, "Gothic" }, +    ["greekandcoptic"]                       = { 0x00370, 0x003FF, "Greek and Coptic" }, +    ["greekextended"]                        = { 0x01F00, 0x01FFF, "Greek Extended" }, +    ["gujarati"]                             = { 0x00A80, 0x00AFF, "Gujarati" }, +    ["gurmukhi"]                             = { 0x00A00, 0x00A7F, "Gurmukhi" }, +    ["halfwidthandfullwidthforms"]           = { 0x0FF00, 0x0FFEF, "Halfwidth and Fullwidth Forms" }, +    ["hangulcompatibilityjamo"]              = { 0x03130, 0x0318F, "Hangul Compatibility Jamo" }, +    ["hanguljamo"]                           = { 0x01100, 0x011FF, "Hangul Jamo" }, +    ["hangulsyllables"]                      = { 0x0AC00, 0x0D7AF, "Hangul Syllables" }, +    ["hanunoo"]                              = { 0x01720, 0x0173F, "Hanunoo" }, +    ["hebrew"]                               = { 0x00590, 0x005FF, "Hebrew" }, +    ["highprivateusesurrogates"]             = { 0x0DB80, 0x0DBFF, "High Private Use Surrogates" }, +    ["highsurrogates"]                       = { 0x0D800, 0x0DB7F, "High Surrogates" }, +    ["hiragana"]                             = { 0x03040, 0x0309F, "Hiragana" }, +    ["ideographicdescriptioncharacters"]     = { 0x02FF0, 0x02FFF, "Ideographic Description Characters" }, +    ["ipaextensions"]                        = { 0x00250, 0x02AF, "IPA Extensions" }, +    ["kanbun"]                               = { 0x03190, 0x0319F, "Kanbun" }, +    ["kangxiradicals"]                       = { 0x02F00, 0x02FDF, "Kangxi Radicals" }, +    ["kannada"]                              = { 0x00C80, 0x00CFF, "Kannada" }, +    ["katakana"]                             = { 0x030A0, 0x030FF, "Katakana" }, +    ["katakanaphoneticextensions"]           = { 0x031F0, 0x031FF, "Katakana Phonetic Extensions" }, +    ["kayahli"]                              = { 0x0A900, 0x0A92F, "Kayah Li" }, +    ["kharoshthi"]                           = { 0x10A00, 0x10A5F, "Kharoshthi" }, +    ["khmer"]                                = { 0x01780, 0x017FF, "Khmer" }, +    ["khmersymbols"]                         = { 0x019E0, 0x019FF, "Khmer Symbols" }, +    ["lao"]                                  = { 0x00E80, 0x00EFF, "Lao" }, +    ["latinextendeda"]                       = { 0x00100, 0x0017F, "Latin Extended-A" }, +    ["latinextendedadditional"]              = { 0x01E00, 0x01EFF, "Latin Extended Additional" }, +    ["latinextendedb"]                       = { 0x00180, 0x0024F, "Latin Extended-B" }, +    ["latinextendedc"]                       = { 0x02C60, 0x02C7F, "Latin Extended-C" }, +    ["latinextendedd"]                       = { 0x0A720, 0x0A7FF, "Latin Extended-D" }, +    ["latinsupplement"]                      = { 0x00080, 0x000FF, "Latin-1 Supplement" }, +    ["lepcha"]                               = { 0x01C00, 0x01C4F, "Lepcha" }, +    ["letterlikesymbols"]                    = { 0x02100, 0x0214F, "Letterlike Symbols" }, +    ["limbu"]                                = { 0x01900, 0x0194F, "Limbu" }, +    ["linearbideograms"]                     = { 0x10080, 0x100FF, "Linear B Ideograms" }, +    ["linearbsyllabary"]                     = { 0x10000, 0x1007F, "Linear B Syllabary" }, +    ["lowsurrogates"]                        = { 0x0DC00, 0x0DFFF, "Low Surrogates" }, +    ["lycian"]                               = { 0x10280, 0x1029F, "Lycian" }, +    ["lydian"]                               = { 0x10920, 0x1093F, "Lydian" }, +    ["mahjongtiles"]                         = { 0x1F000, 0x1F02F, "Mahjong Tiles" }, +    ["malayalam"]                            = { 0x00D00, 0x00D7F, "Malayalam" }, +    ["mathematicalalphanumericsymbols"]      = { 0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols" }, +    ["mathematicaloperators"]                = { 0x02200, 0x022FF, "Mathematical Operators" }, +    ["miscellaneousmathematicalsymbolsa"]    = { 0x027C0, 0x027EF, "Miscellaneous Mathematical Symbols-A" }, +    ["miscellaneousmathematicalsymbolsb"]    = { 0x02980, 0x029FF, "Miscellaneous Mathematical Symbols-B" }, +    ["miscellaneoussymbols"]                 = { 0x02600, 0x026FF, "Miscellaneous Symbols" }, +    ["miscellaneoussymbolsandarrows"]        = { 0x02B00, 0x02BFF, "Miscellaneous Symbols and Arrows" }, +    ["miscellaneoustechnical"]               = { 0x02300, 0x023FF, "Miscellaneous Technical" }, +    ["modifiertoneletters"]                  = { 0x0A700, 0x0A71F, "Modifier Tone Letters" }, +    ["mongolian"]                            = { 0x01800, 0x018AF, "Mongolian" }, +    ["musicalsymbols"]                       = { 0x1D100, 0x1D1FF, "Musical Symbols" }, +    ["myanmar"]                              = { 0x01000, 0x0109F, "Myanmar" }, +    ["newtailue"]                            = { 0x01980, 0x019DF, "New Tai Lue" }, +    ["nko"]                                  = { 0x007C0, 0x007FF, "NKo" }, +    ["numberforms"]                          = { 0x02150, 0x0218F, "Number Forms" }, +    ["ogham"]                                = { 0x01680, 0x0169F, "Ogham" }, +    ["olchiki"]                              = { 0x01C50, 0x01C7F, "Ol Chiki" }, +    ["olditalic"]                            = { 0x10300, 0x1032F, "Old Italic" }, +    ["oldpersian"]                           = { 0x103A0, 0x103DF, "Old Persian" }, +    ["opticalcharacterrecognition"]          = { 0x02440, 0x0245F, "Optical Character Recognition" }, +    ["oriya"]                                = { 0x00B00, 0x00B7F, "Oriya" }, +    ["osmanya"]                              = { 0x10480, 0x104AF, "Osmanya" }, +    ["phagspa"]                              = { 0x0A840, 0x0A87F, "Phags-pa" }, +    ["phaistosdisc"]                         = { 0x101D0, 0x101FF, "Phaistos Disc" }, +    ["phoenician"]                           = { 0x10900, 0x1091F, "Phoenician" }, +    ["phoneticextensions"]                   = { 0x01D00, 0x01D7F, "Phonetic Extensions" }, +    ["phoneticextensionssupplement"]         = { 0x01D80, 0x01DBF, "Phonetic Extensions Supplement" }, +    ["privateusearea"]                       = { 0x0E000, 0x0F8FF, "Private Use Area" }, +    ["rejang"]                               = { 0x0A930, 0x0A95F, "Rejang" }, +    ["runic"]                                = { 0x016A0, 0x016FF, "Runic" }, +    ["saurashtra"]                           = { 0x0A880, 0x0A8DF, "Saurashtra" }, +    ["shavian"]                              = { 0x10450, 0x1047F, "Shavian" }, +    ["sinhala"]                              = { 0x00D80, 0x00DFF, "Sinhala" }, +    ["smallformvariants"]                    = { 0x0FE50, 0x0FE6F, "Small Form Variants" }, +    ["spacingmodifierletters"]               = { 0x002B0, 0x002FF, "Spacing Modifier Letters" }, +    ["specials"]                             = { 0x0FFF0, 0x0FFFF, "Specials" }, +    ["sundanese"]                            = { 0x01B80, 0x01BBF, "Sundanese" }, +    ["superscriptsandsubscripts"]            = { 0x02070, 0x0209F, "Superscripts and Subscripts" }, +    ["supplementalarrowsa"]                  = { 0x027F0, 0x027FF, "Supplemental Arrows-A" }, +    ["supplementalarrowsb"]                  = { 0x02900, 0x0297F, "Supplemental Arrows-B" }, +    ["supplementalmathematicaloperators"]    = { 0x02A00, 0x02AFF, "Supplemental Mathematical Operators" }, +    ["supplementalpunctuation"]              = { 0x02E00, 0x02E7F, "Supplemental Punctuation" }, +    ["supplementaryprivateuseareaa"]         = { 0xF0000, 0xFFFFF, "Supplementary Private Use Area-A" }, +    ["supplementaryprivateuseareab"]         = { 0x100000,0x10FFFF,"Supplementary Private Use Area-B" }, +    ["sylotinagri"]                          = { 0x0A800, 0x0A82F, "Syloti Nagri" }, +    ["syriac"]                               = { 0x00700, 0x0074F, "Syriac" }, +    ["tagalog"]                              = { 0x01700, 0x0171F, "Tagalog" }, +    ["tagbanwa"]                             = { 0x01760, 0x0177F, "Tagbanwa" }, +    ["tags"]                                 = { 0xE0000, 0xE007F, "Tags" }, +    ["taile"]                                = { 0x01950, 0x0197F, "Tai Le" }, +    ["taixuanjingsymbols"]                   = { 0x1D300, 0x1D35F, "Tai Xuan Jing Symbols" }, +    ["tamil"]                                = { 0x00B80, 0x00BFF, "Tamil" }, +    ["telugu"]                               = { 0x00C00, 0x00C7F, "Telugu" }, +    ["thaana"]                               = { 0x00780, 0x007BF, "Thaana" }, +    ["thai"]                                 = { 0x00E00, 0x00E7F, "Thai" }, +    ["tibetan"]                              = { 0x00F00, 0x00FFF, "Tibetan" }, +    ["tifinagh"]                             = { 0x02D30, 0x02D7F, "Tifinagh" }, +    ["ugaritic"]                             = { 0x10380, 0x1039F, "Ugaritic" }, +    ["unifiedcanadianaboriginalsyllabics"]   = { 0x01400, 0x0167F, "Unified Canadian Aboriginal Syllabics" }, +    ["vai"]                                  = { 0x0A500, 0x0A63F, "Vai" }, +    ["variationselectors"]                   = { 0x0FE00, 0x0FE0F, "Variation Selectors" }, +    ["variationselectorssupplement"]         = { 0xE0100, 0xE01EF, "Variation Selectors Supplement" }, +    ["verticalforms"]                        = { 0x0FE10, 0x0FE1F, "Vertical Forms" }, +    ["yijinghexagramsymbols"]                = { 0x04DC0, 0x04DFF, "Yijing Hexagram Symbols" }, +    ["yiradicals"]                           = { 0x0A490, 0x0A4CF, "Yi Radicals" }, +    ["yisyllables"]                          = { 0x0A000, 0x0A48F, "Yi Syllables" },  }  function characters.getrange(name) -    local tag = name:lower() -    tag = name:gsub("[^a-z]", "") +    local tag = lower(name) +    tag = gsub(name,"[^a-z]", "")      local range = characters.blocks[tag]      if range then          return range[1], range[2], range[3]      end -    name = name:gsub('"',"0x") -- goodie: tex hex notation -    local start, stop = name:match("^(.-)[%-%:](.-)$") +    name = gsub(name,'"',"0x") -- goodie: tex hex notation +    local start, stop = match(name,"^(.-)[%-%:](.-)$")      if start and stop then          start, stop = tonumber(start,16) or tonumber(start), tonumber(stop,16) or tonumber(stop)          if start and stop then @@ -252,7 +288,16 @@ characters.categories = {  --~ characters: ll lm lo lt lu mn nl no pc pd pe pf pi po ps sc sk sm so  characters.is_character = table.tohash { -    "ll","lm","lo","lt","lu","mn","nl","no","pc","pd","pe","pf","pi","po","ps","sc","sk","sm","so" +    "lu","ll","lt","lm","lo", +    "nd","nl","no", +    "mn", +    "nl","no", +    "pc","pd","ps","pe","pi","pf","po", +    "sm","sc","sk","so" +} + +characters.is_letter = table.tohash { +    "ll","lm","lo","lt","lu"  }  characters.is_command = table.tohash { @@ -296,34 +341,23 @@ function table.set_empty_metatable(t)      setmetatable(t,_empty_table_)  end -table.set_empty_metatable(characters.data) +table.set_empty_metatable(data)  --[[ldx--  <p>At this point we assume that the big data table is loaded. From this  table we derive a few more.</p>  --ldx]]-- --- used ? - -characters.unicodes   = characters.unicodes   or { } -characters.utfcodes   = characters.utfcodes   or { } -characters.enccodes   = characters.enccodes   or { } -characters.fallbacks  = characters.fallbacks  or { } -characters.directions = characters.directions or { } - -function characters.context.rehash() -    local unicodes, utfcodes, enccodes, fallbacks, directions = characters.unicodes, characters.utfcodes, characters.enccodes, characters.fallbacks, characters.directions -    for k,v in pairs(characters.data) do -        local contextname, adobename, specials = v.contextname, v.adobename, v.specials -        if contextname then -            local slot = v.unicodeslot -            unicodes[contextname] = slot -            utfcodes[contextname] = utfchar(slot) -        end -        local encname = adobename or contextname -        if encname then -            enccodes[encname] = k -        end +if not characters.fallbacks then + +    characters.fallbacks   = { } +    characters.directions  = { } + +    local fallbacks  = characters.fallbacks +    local directions = characters.directions + +    for k,v in next, data do +        local specials = v.specials          if specials and specials[1] == "compat" and specials[2] == 0x0020 and specials[3] then              local s = specials[3]              fallbacks[k] = s @@ -331,101 +365,92 @@ function characters.context.rehash()          end          directions[k] = v.direction      end -    for name,code in pairs(characters.synonyms) do -        if not enccodes[name] then enccodes[name] = code end -    end -end --- maybe some day, no significate speed up now +end ---~ input.storage.register(false, "characters.unicodes", characters.unicodes, "characters.unicodes") ---~ input.storage.register(false, "characters.utfcodes", characters.utfcodes, "characters.utfcodes") ---~ input.storage.register(false, "characters.enccodes", characters.enccodes, "characters.enccodes") ---~ input.storage.register(false, "characters.fallbacks", characters.fallbacks, "characters.fallbacks") ---~ input.storage.register(false, "characters.directions", characters.directions, "characters.directions") +storage.register("characters.fallbacks", characters.fallbacks, "characters.fallbacks") +storage.register("characters.directions", characters.directions, "characters.directions")  --[[ldx--  <p>The <type>context</type> namespace is used to store methods and data  which is rather specific to <l n='context'/>.</p>  --ldx]]-- -function characters.context.show(n) -    local n = characters.number(n) -    local d = characters.data[n] -    if d then -        local function entry(label,name) -            texsprint(tex.ctxcatcodes,format("\\NC %s\\NC %s\\NC\\NR",label,characters.valid(d[name]))) -        end -        texsprint(tex.ctxcatcodes,"\\starttabulate[|Tl|Tl|]") -        entry("unicode index" , "unicodeslot") -        entry("context name"  , "contextname") -        entry("adobe name"    , "adobename") -        entry("category"      , "category") -        entry("description"   , "description") -        entry("uppercase code", "uccode") -        entry("lowercase code", "lccode") -        entry("specials"      , "specials") -        texsprint(tex.ctxcatcodes,"\\stoptabulate ") -    end -end -  --[[ldx--  <p>Instead of using a <l n='tex'/> file to define the named glyphs, we  use the table. After all, we have this information available anyway.</p>  --ldx]]--  function characters.makeactive(n,name) -- let ? -    texsprint(tex.ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)) +    texsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))  end  function tex.uprint(n) -    texsprint(tex.ctxcatcodes,utfchar(n)) +    texsprint(ctxcatcodes,utfchar(n))  end -function characters.context.define(tobelettered, tobeactivated) -    local unicodes, utfcodes = characters.unicodes, characters.utfcodes -    local tc = tex.ctxcatcodes -    local is_character, is_command = characters.is_character, characters.is_command +local template_a = "\\startextendcatcodetable{%s}\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined\\stopextendcatcodetable" +local template_b = "\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined" + +-- we need a function for setting the codes .... + +function characters.define(tobelettered, tobeactivated) -- catcodetables +    local is_character, is_command, is_letter = characters.is_character, characters.is_command, characters.is_letter      local lettered, activated = { }, { } -    for u, chr in pairs(characters.data) do +    for u, chr in next, data do +        -- we can use a macro instead of direct settings          local fallback = chr.fallback          if fallback then -            texsprint("{\\catcode"..u.."=13\\unexpanded\\gdef "..utfchar(u).."{\\checkedchar{"..u.."}{"..fallback.."}}}") -            activated[#activated+1] = "\\c"..u.."=".."13" +            texprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") +            activated[#activated+1] = "\\c"..u.."\\a"          else              local contextname = chr.contextname              local category = chr.category              if contextname then                  if is_character[category] then                   -- by this time, we're still in normal catcode mode +                 -- subtle: not "\\",contextname but "\\"..contextname                      if chr.unicodeslot < 128 then -                        texsprint(tc, "\\chardef\\" .. contextname .. "=" .. u) -- unicodes[contextname]) +                        texprint(ctxcatcodes, "\\chardef\\"..contextname,"=",u)                      else -                        texsprint(tc, "\\let\\" .. contextname .. "=" .. utfchar(u)) -- utfcodes[contextname]) -                        lettered[#lettered+1] = "\\c"..u.."=".."11" +                        texprint(ctxcatcodes, "\\let\\"..contextname,"=",utfchar(u)) +                        if is_letter[category] then +                            lettered[#lettered+1] = "\\c"..u.."\\l" +                        end                      end                  elseif is_command[category] then -                    texsprint("{\\catcode"..u.."=13\\unexpanded\\gdef "..utfchar(u).."{\\"..contextname.."}}") -                    activated[#activated+1] = "\\c"..u.."=".."13" +                    texprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") +                    activated[#activated+1] = "\\c"..u.."\\a"                  end -            else -                if is_character[category] then -                    if u >= 128 and u <= 65536 then -                        lettered[#lettered+1] = "\\c"..u.."=".."11" -                    end +            elseif is_letter[category] then +                if u >= 128 and u <= 65536 then -- catch private mess +                    lettered[#lettered+1] = "\\c"..u.."\\l"                  end              end          end +        if chr.range then +            lettered[#lettered+1] = format('\\dofastrecurse{"%05X}{"%05X}{1}{\\c\\fastrecursecounter\\l}',u,chr.range) +        end      end -    lettered[#lettered+1] = "\\c"..0x200C.."=".."11" -- non-joiner -    lettered[#lettered+1] = "\\c"..0x200D.."=".."11" -- joiner -    lettered = concat(lettered) -    for _, i in ipairs(tobelettered or { }) do -        texsprint(tc,format("\\startextendcatcodetable{%s}\\let\\c\\catcode%s\\stopextendcatcodetable",i,lettered)) + -- if false then +    lettered[#lettered+1] = "\\c"..0x200C.."\\l" -- non-joiner +    lettered[#lettered+1] = "\\c"..0x200D.."\\l" -- joiner + -- fi +    if tobelettered then +        lettered = concat(lettered) +        if true then +            texsprint(ctxcatcodes,format(template_b,lettered)) +        else +            for l=1,#tobelettered do +                texsprint(ctxcatcodes,format(template_a,tobelettered[l],lettered)) +            end +        end      end -    activated = concat(activated) -    for _, i in ipairs(tobeactivated or { } ) do -        texsprint(tc,format("\\startextendcatcodetable{%s}\\let\\c\\catcode%s\\stopextendcatcodetable",i,activated)) +    if tobeactivated then +        activated = concat(activated) +        for a=1,#tobeactivated do +            texsprint(ctxcatcodes,format(template_a,tobeactivated[a],activated)) +        end      end  end @@ -439,15 +464,22 @@ end  <p>Setting the lccodes is also done in a loop over the data table.</p>  --ldx]]-- +-- we need a function ... +  function characters.setcodes() -    local tc = tex.ctxcatcodes -    for code, chr in pairs(characters.data) do +    for code, chr in next, data do          local cc = chr.category          if cc == 'll' or cc == 'lu' or cc == 'lt' then              local lc, uc = chr.lccode, chr.uccode              if not lc then chr.lccode, lc = code, code end              if not uc then chr.uccode, uc = code, code end -            texsprint(tc, format("\\setcclcuc %i %i %i ",code,lc,uc)) +            texsprint(ctxcatcodes,format("\\setcclcuc{%i}{%i}{%i}",code,lc,uc)) +        end +        if cc == "lu" then +            texprint(ctxcatcodes,"\\sfcode ",code,"999 ") +        end +        if cc == "lo" and chr.range then +            texsprint(ctxcatcodes,format('\\dofastrecurse{"%05X}{"%05X}{1}{\\setcclcucself\\fastrecursecounter}',code,chr.range))          end      end  end @@ -480,21 +512,12 @@ end  characters.valid = characters.is_valid  --[[ldx-- -<p>The next method is used when constructing the main table, although nowadays -we do this in one step. The index can be a string or a number.</p> ---ldx]]-- - -function characters.define(c) -    characters.data[characters.number(c.unicodeslot)] = c -end - ---[[ldx--  <p></p>  --ldx]]--  -- set a table entry; index is number (can be different from unicodeslot)  function characters.set(n, c) -    characters.data[characters.number(n)] = c +    data[characters.number(n)] = c  end  --[[ldx-- @@ -503,7 +526,7 @@ can be different (not likely).</p>  --ldx]]--  function characters.get(n) -    return characters.data[characters.number(n)] +    return data[characters.number(n)]  end  --[[ldx-- @@ -512,43 +535,43 @@ to the checking.</p>  --ldx]]--  function characters.hexindex(n) -    return format("%04X", characters.valid(characters.data[characters.number(n)].unicodeslot)) +    return format("%04X", characters.valid(data[characters.number(n)].unicodeslot))  end  function characters.contextname(n) -    return characters.valid(characters.data[characters.number(n)].contextname) +    return characters.valid(data[characters.number(n)].contextname)  end  function characters.adobename(n) -    return characters.valid(characters.data[characters.number(n)].adobename) +    return characters.valid(data[characters.number(n)].adobename)  end  function characters.description(n) -    return characters.valid(characters.data[characters.number(n)].description) +    return characters.valid(data[characters.number(n)].description)  end  function characters.category(n) -    return characters.valid(characters.data[characters.number(n)].category) +    return characters.valid(data[characters.number(n)].category)  end  --[[ldx--  <p>Requesting lower and uppercase codes:</p>  --ldx]]-- -function characters.uccode(n) return characters.data[n].uccode or n end -function characters.lccode(n) return characters.data[n].lccode or n end +function characters.uccode(n) return data[n].uccode or n end +function characters.lccode(n) return data[n].lccode or n end  function characters.flush(n) -    local c = characters.data[n] +    local c = data[n]      if c and c.contextname then -        texsprint(tex.texcatcodes, "\\"..c.contextname) +        texsprint(texcatcodes, "\\"..c.contextname)      else -        texsprint(unicode.utf8.char(n)) +        texsprint(utfchar(n))      end  end  function characters.shape(n) -    local shcode = characters.data[n].shcode +    local shcode = data[n].shcode      if not shcode then          return n, nil      elseif type(shcode) == "table" then @@ -564,43 +587,29 @@ end  function characters.is_of_category(token,category)      if type(token) == "string" then -        return characters.data[utfbyte(token)].category == category +        return data[utfbyte(token)].category == category      else -        return characters.data[token].category == category +        return data[token].category == category      end  end  function characters.i_is_of_category(i,category) -- by index (number) -    local cd = characters.data[i] +    local cd = data[i]      return cd and cd.category == category  end  function characters.n_is_of_category(n,category) -- by name (string) -    local cd = characters.data[utfbyte(n)] +    local cd = data[utfbyte(n)]      return cd and cd.category == category  end ---[[ldx-- -<p>The following code is kind of messy. It is used to generate the right -unicode reference tables.</p> ---ldx]]-- - -function characters.setpdfunicodes() ---~     local tc = tex.ctxcatcodes ---~     for _,v in pairs(characters.data) do ---~         if v.adobename then ---~             texsprint(tc,format("\\pdfglyphtounicode{%s}{%04X}", v.adobename, v.unicodeslot)) ---~         end ---~     end -end -  -- xml support  characters.active_offset = 0x10000  xml.entities = xml.entities or { } -input.storage.register(false,"xml/entities",xml.entities,"xml.entities") -- this will move to lxml +storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml  function characters.remapentity(chr,slot)      texsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr)) | 
