From 7b271baae19db1528fbe6621bdf50af89a5a336b Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Fri, 22 Feb 2019 20:29:46 +0100 Subject: 2019-02-22 19:43:00 --- tex/context/base/mkiv/char-ini.lua | 60 ++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 19 deletions(-) (limited to 'tex/context/base/mkiv/char-ini.lua') diff --git a/tex/context/base/mkiv/char-ini.lua b/tex/context/base/mkiv/char-ini.lua index c308a2c0f..fb9d9f126 100644 --- a/tex/context/base/mkiv/char-ini.lua +++ b/tex/context/base/mkiv/char-ini.lua @@ -234,6 +234,7 @@ local blocks = allocate { ["cham"] = { first = 0x0AA00, last = 0x0AA5F, description = "Cham" }, ["cherokee"] = { first = 0x013A0, last = 0x013FF, otf="cher", description = "Cherokee" }, ["cherokeesupplement"] = { first = 0x0AB70, last = 0x0ABBF, description = "Cherokee Supplement" }, + ["chesssymbols"] = { first = 0x1FA00, last = 0x1FA6F, description = "Chess Symbols" }, ["cjkcompatibility"] = { first = 0x03300, last = 0x033FF, otf="hang", description = "CJK Compatibility" }, ["cjkcompatibilityforms"] = { first = 0x0FE30, last = 0x0FE4F, otf="hang", description = "CJK Compatibility Forms" }, ["cjkcompatibilityideographs"] = { first = 0x0F900, last = 0x0FAFF, otf="hang", description = "CJK Compatibility Ideographs" }, @@ -296,6 +297,7 @@ local blocks = allocate { -- ["digitsthai"] = { first = 0x00E50, last = 0x00E59, math = true }, -- ["digitstibetan"] = { first = 0x00F20, last = 0x00F29, math = true }, ["dingbats"] = { first = 0x02700, last = 0x027BF, description = "Dingbats" }, + ["dogra"] = { first = 0x11800, last = 0x1184F, description = "Dogra" }, ["dominotiles"] = { first = 0x1F030, last = 0x1F09F, description = "Domino Tiles" }, ["duployan"] = { first = 0x1BC00, last = 0x1BC9F, description = "Duployan" }, ["earlydynasticcuneiform"] = { first = 0x12480, last = 0x1254F, description = "Early Dynastic Cuneiform" }, @@ -314,6 +316,7 @@ local blocks = allocate { ["geometricshapes"] = { first = 0x025A0, last = 0x025FF, math = true, description = "Geometric Shapes" }, ["geometricshapesextended"] = { first = 0x1F780, last = 0x1F7FF, description = "Geometric Shapes Extended" }, ["georgian"] = { first = 0x010A0, last = 0x010FF, otf="geor", description = "Georgian" }, + ["georgianextended"] = { first = 0x01C90, last = 0x01CBF, description = "Georgian Extended" }, ["georgiansupplement"] = { first = 0x02D00, last = 0x02D2F, otf="geor", description = "Georgian Supplement" }, ["glagolitic"] = { first = 0x02C00, last = 0x02C5F, otf="glag", description = "Glagolitic" }, ["glagoliticsupplement"] = { first = 0x1E000, last = 0x1E02F, description = "Glagolitic Supplement" }, @@ -322,6 +325,7 @@ local blocks = allocate { ["greekandcoptic"] = { first = 0x00370, last = 0x003FF, otf="grek", description = "Greek and Coptic" }, ["greekextended"] = { first = 0x01F00, last = 0x01FFF, otf="grek", description = "Greek Extended" }, ["gujarati"] = { first = 0x00A80, last = 0x00AFF, otf="gujr", description = "Gujarati" }, + ["gunjalagondi"] = { first = 0x11D60, last = 0x11DAF, description = "Gunjala Gondi" }, ["gurmukhi"] = { first = 0x00A00, last = 0x00A7F, otf="guru", description = "Gurmukhi" }, ["halfwidthandfullwidthforms"] = { first = 0x0FF00, last = 0x0FFEF, description = "Halfwidth and Fullwidth Forms" }, ["hangulcompatibilityjamo"] = { first = 0x03130, last = 0x0318F, otf="jamo", description = "Hangul Compatibility Jamo" }, @@ -329,6 +333,7 @@ local blocks = allocate { ["hanguljamoextendeda"] = { first = 0x0A960, last = 0x0A97F, description = "Hangul Jamo Extended-A" }, ["hanguljamoextendedb"] = { first = 0x0D7B0, last = 0x0D7FF, description = "Hangul Jamo Extended-B" }, ["hangulsyllables"] = { first = 0x0AC00, last = 0x0D7AF, otf="hang", description = "Hangul Syllables" }, + ["hanifirohingya"] = { first = 0x10D00, last = 0x10D3F, description = "Hanifi Rohingya" }, ["hanunoo"] = { first = 0x01720, last = 0x0173F, otf="hano", description = "Hanunoo" }, ["hatran"] = { first = 0x108E0, last = 0x108FF, description = "Hatran" }, ["hebrew"] = { first = 0x00590, last = 0x005FF, otf="hebr", description = "Hebrew" }, @@ -338,6 +343,7 @@ local blocks = allocate { ["ideographicdescriptioncharacters"] = { first = 0x02FF0, last = 0x02FFF, description = "Ideographic Description Characters" }, ["ideographicsymbolsandpunctuation"] = { first = 0x16FE0, last = 0x16FFF, description = "Ideographic Symbols and Punctuation" }, ["imperialaramaic"] = { first = 0x10840, last = 0x1085F, description = "Imperial Aramaic" }, + ["indicsiyaqnumbers"] = { first = 0x1EC70, last = 0x1ECBF, description = "Indic Siyaq Numbers" }, ["inscriptionalpahlavi"] = { first = 0x10B60, last = 0x10B7F, description = "Inscriptional Pahlavi" }, ["inscriptionalparthian"] = { first = 0x10B40, last = 0x10B5F, description = "Inscriptional Parthian" }, ["ipaextensions"] = { first = 0x00250, last = 0x002AF, description = "IPA Extensions" }, @@ -396,6 +402,7 @@ local blocks = allocate { ["lydian"] = { first = 0x10920, last = 0x1093F, description = "Lydian" }, ["mahajani"] = { first = 0x11150, last = 0x1117F, description = "Mahajani" }, ["mahjongtiles"] = { first = 0x1F000, last = 0x1F02F, description = "Mahjong Tiles" }, + ["makasar"] = { first = 0x11EE0, last = 0x11EFF, description = "Makasar" }, ["malayalam"] = { first = 0x00D00, last = 0x00D7F, otf="mlym", description = "Malayalam" }, ["mandaic"] = { first = 0x00840, last = 0x0085F, otf="mand", description = "Mandaic" }, ["manichaean"] = { first = 0x10AC0, last = 0x10AFF, description = "Manichaean" }, @@ -403,6 +410,8 @@ local blocks = allocate { ["masaramgondi"] = { first = 0x11D00, last = 0x11D5F, description = "Masaram Gondi" }, ["mathematicalalphanumericsymbols"] = { first = 0x1D400, last = 0x1D7FF, math = true, description = "Mathematical Alphanumeric Symbols" }, ["mathematicaloperators"] = { first = 0x02200, last = 0x022FF, math = true, description = "Mathematical Operators" }, + ["mayannumerals"] = { first = 0x1D2E0, last = 0x1D2FF, description = "Mayan Numerals" }, + ["medefaidrin"] = { first = 0x16E40, last = 0x16E9F, description = "Medefaidrin" }, ["meeteimayek"] = { first = 0x0ABC0, last = 0x0ABFF, description = "Meetei Mayek" }, ["meeteimayekextensions"] = { first = 0x0AAE0, last = 0x0AAFF, description = "Meetei Mayek Extensions" }, ["mendekikakui"] = { first = 0x1E800, last = 0x1E8DF, description = "Mende Kikakui" }, @@ -438,6 +447,7 @@ local blocks = allocate { ["oldnortharabian"] = { first = 0x10A80, last = 0x10A9F, description = "Old North Arabian" }, ["oldpermic"] = { first = 0x10350, last = 0x1037F, description = "Old Permic" }, ["oldpersian"] = { first = 0x103A0, last = 0x103DF, otf="xpeo", description = "Old Persian" }, + ["oldsogdian"] = { first = 0x10F00, last = 0x10F2F, description = "Old Sogdian" }, ["oldsoutharabian"] = { first = 0x10A60, last = 0x10A7F, description = "Old South Arabian" }, ["oldturkic"] = { first = 0x10C00, last = 0x10C4F, description = "Old Turkic" }, ["opticalcharacterrecognition"] = { first = 0x02440, last = 0x0245F, description = "Optical Character Recognition" }, @@ -468,6 +478,7 @@ local blocks = allocate { ["sinhala"] = { first = 0x00D80, last = 0x00DFF, otf="sinh", description = "Sinhala" }, ["sinhalaarchaicnumbers"] = { first = 0x111E0, last = 0x111FF, description = "Sinhala Archaic Numbers" }, ["smallformvariants"] = { first = 0x0FE50, last = 0x0FE6F, description = "Small Form Variants" }, + ["sogdian"] = { first = 0x10F30, last = 0x10F6F, description = "Sogdian" }, ["sorasompeng"] = { first = 0x110D0, last = 0x110FF, description = "Sora Sompeng" }, ["soyombo"] = { first = 0x11A50, last = 0x11AAF, description = "Soyombo" }, ["spacingmodifierletters"] = { first = 0x002B0, last = 0x002FF, description = "Spacing Modifier Letters" }, @@ -606,7 +617,8 @@ characters.otfscripts = otfscripts setmetatableindex(otfscripts,function(t,unicode) for k, v in next, blocks do - local first, last = v.first, v.last + local first = v.first + local last = v.last if unicode >= first and unicode <= last then local script = v.otf or "dflt" for u=first,last do @@ -631,25 +643,27 @@ function characters.getrange(name,expression) -- used in font fallback definitio name = gsub(name,'"',"0x") -- goodie: tex hex notation local start, stop if expression then - local first, rest = lpegmatch(splitter2,name) - local range = rawget(blocks,lower(gsub(first,"[^a-zA-Z0-9]",""))) - if range then - start = range.first - stop = range.last - local s = loadstring("return 0 " .. rest) - if type(s) == "function" then - local d = s() - if type(d) == "number" then - start = start + d - stop = stop + d - return start, stop, nil + local n = tonumber(name) + if n then + return n, n, nil + else + local first, rest = lpegmatch(splitter2,name) + local range = rawget(blocks,lower(gsub(first,"[^a-zA-Z0-9]",""))) + if range then + local s = loadstring("return 0 " .. rest) + if type(s) == "function" then + local d = s() + if type(d) == "number" then + return range.first + d, range.last + d, nil + end end end end end - start, stop = lpegmatch(splitter1,name) + local start, stop = lpegmatch(splitter1,name) if start and stop then - start, stop = tonumber(start,16) or tonumber(start), tonumber(stop,16) or tonumber(stop) + start = tonumber(start,16) or tonumber(start) + stop = tonumber(stop, 16) or tonumber(stop) if start and stop then return start, stop, nil end @@ -738,6 +752,10 @@ local is_punctuation = allocate ( tohash { "pc","pd","ps","pe","pi","pf","po", } ) +local is_symbol = allocate ( tohash { + "sm", "sc", "sk", "so", +} ) + -- to be redone: store checked characters characters.is_character = is_character @@ -746,6 +764,7 @@ characters.is_command = is_command characters.is_spacing = is_spacing characters.is_mark = is_mark characters.is_punctuation = is_punctuation +characters.is_symbol = is_symbol local mti = function(t,k) if type(k) == "number" then @@ -1039,7 +1058,8 @@ setmetatableindex(specialchars, function(t,u) local c = data[u] local s = c and c.specials if s then - local tt, ttn = { }, 0 + local tt = { } + local ttn = 0 for i=2,#s do local si = s[i] local c = data[si] @@ -1265,9 +1285,11 @@ lpegpatterns.utf8lower = utf8lower -- string lpegpatterns.utf8upper = utf8upper -- string lpegpatterns.utf8shape = utf8shape -- string -function characters.lower (str) return lpegmatch(utf8lower,str) end -function characters.upper (str) return lpegmatch(utf8upper,str) end -function characters.shaped(str) return lpegmatch(utf8shape,str) end +function characters.lower (str) return str and lpegmatch(utf8lower,str) or "" end +function characters.upper (str) return str and lpegmatch(utf8upper,str) or "" end +function characters.shaped(str) return str and lpegmatch(utf8shape,str) or "" end + +lpeg.setutfcasers(characters.lower,characters.upper) -- local str = [[ -- ÀÁÂÃÄÅàáâãäå àáâãäåàáâãäå ÀÁÂÃÄÅÀÁÂÃÄÅ AAAAAAaaaaaa -- cgit v1.2.3