summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/char-ini.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/char-ini.lua')
-rw-r--r--tex/context/base/mkiv/char-ini.lua60
1 files changed, 41 insertions, 19 deletions
diff --git a/tex/context/base/mkiv/char-ini.lua b/tex/context/base/mkiv/char-ini.lua
index c308a2c0f..fb9d9f126 100644
--- a/tex/context/base/mkiv/char-ini.lua
+++ b/tex/context/base/mkiv/char-ini.lua
@@ -234,6 +234,7 @@ local blocks = allocate {
["cham"] = { first = 0x0AA00, last = 0x0AA5F, description = "Cham" },
["cherokee"] = { first = 0x013A0, last = 0x013FF, otf="cher", description = "Cherokee" },
["cherokeesupplement"] = { first = 0x0AB70, last = 0x0ABBF, description = "Cherokee Supplement" },
+ ["chesssymbols"] = { first = 0x1FA00, last = 0x1FA6F, description = "Chess Symbols" },
["cjkcompatibility"] = { first = 0x03300, last = 0x033FF, otf="hang", description = "CJK Compatibility" },
["cjkcompatibilityforms"] = { first = 0x0FE30, last = 0x0FE4F, otf="hang", description = "CJK Compatibility Forms" },
["cjkcompatibilityideographs"] = { first = 0x0F900, last = 0x0FAFF, otf="hang", description = "CJK Compatibility Ideographs" },
@@ -296,6 +297,7 @@ local blocks = allocate {
-- ["digitsthai"] = { first = 0x00E50, last = 0x00E59, math = true },
-- ["digitstibetan"] = { first = 0x00F20, last = 0x00F29, math = true },
["dingbats"] = { first = 0x02700, last = 0x027BF, description = "Dingbats" },
+ ["dogra"] = { first = 0x11800, last = 0x1184F, description = "Dogra" },
["dominotiles"] = { first = 0x1F030, last = 0x1F09F, description = "Domino Tiles" },
["duployan"] = { first = 0x1BC00, last = 0x1BC9F, description = "Duployan" },
["earlydynasticcuneiform"] = { first = 0x12480, last = 0x1254F, description = "Early Dynastic Cuneiform" },
@@ -314,6 +316,7 @@ local blocks = allocate {
["geometricshapes"] = { first = 0x025A0, last = 0x025FF, math = true, description = "Geometric Shapes" },
["geometricshapesextended"] = { first = 0x1F780, last = 0x1F7FF, description = "Geometric Shapes Extended" },
["georgian"] = { first = 0x010A0, last = 0x010FF, otf="geor", description = "Georgian" },
+ ["georgianextended"] = { first = 0x01C90, last = 0x01CBF, description = "Georgian Extended" },
["georgiansupplement"] = { first = 0x02D00, last = 0x02D2F, otf="geor", description = "Georgian Supplement" },
["glagolitic"] = { first = 0x02C00, last = 0x02C5F, otf="glag", description = "Glagolitic" },
["glagoliticsupplement"] = { first = 0x1E000, last = 0x1E02F, description = "Glagolitic Supplement" },
@@ -322,6 +325,7 @@ local blocks = allocate {
["greekandcoptic"] = { first = 0x00370, last = 0x003FF, otf="grek", description = "Greek and Coptic" },
["greekextended"] = { first = 0x01F00, last = 0x01FFF, otf="grek", description = "Greek Extended" },
["gujarati"] = { first = 0x00A80, last = 0x00AFF, otf="gujr", description = "Gujarati" },
+ ["gunjalagondi"] = { first = 0x11D60, last = 0x11DAF, description = "Gunjala Gondi" },
["gurmukhi"] = { first = 0x00A00, last = 0x00A7F, otf="guru", description = "Gurmukhi" },
["halfwidthandfullwidthforms"] = { first = 0x0FF00, last = 0x0FFEF, description = "Halfwidth and Fullwidth Forms" },
["hangulcompatibilityjamo"] = { first = 0x03130, last = 0x0318F, otf="jamo", description = "Hangul Compatibility Jamo" },
@@ -329,6 +333,7 @@ local blocks = allocate {
["hanguljamoextendeda"] = { first = 0x0A960, last = 0x0A97F, description = "Hangul Jamo Extended-A" },
["hanguljamoextendedb"] = { first = 0x0D7B0, last = 0x0D7FF, description = "Hangul Jamo Extended-B" },
["hangulsyllables"] = { first = 0x0AC00, last = 0x0D7AF, otf="hang", description = "Hangul Syllables" },
+ ["hanifirohingya"] = { first = 0x10D00, last = 0x10D3F, description = "Hanifi Rohingya" },
["hanunoo"] = { first = 0x01720, last = 0x0173F, otf="hano", description = "Hanunoo" },
["hatran"] = { first = 0x108E0, last = 0x108FF, description = "Hatran" },
["hebrew"] = { first = 0x00590, last = 0x005FF, otf="hebr", description = "Hebrew" },
@@ -338,6 +343,7 @@ local blocks = allocate {
["ideographicdescriptioncharacters"] = { first = 0x02FF0, last = 0x02FFF, description = "Ideographic Description Characters" },
["ideographicsymbolsandpunctuation"] = { first = 0x16FE0, last = 0x16FFF, description = "Ideographic Symbols and Punctuation" },
["imperialaramaic"] = { first = 0x10840, last = 0x1085F, description = "Imperial Aramaic" },
+ ["indicsiyaqnumbers"] = { first = 0x1EC70, last = 0x1ECBF, description = "Indic Siyaq Numbers" },
["inscriptionalpahlavi"] = { first = 0x10B60, last = 0x10B7F, description = "Inscriptional Pahlavi" },
["inscriptionalparthian"] = { first = 0x10B40, last = 0x10B5F, description = "Inscriptional Parthian" },
["ipaextensions"] = { first = 0x00250, last = 0x002AF, description = "IPA Extensions" },
@@ -396,6 +402,7 @@ local blocks = allocate {
["lydian"] = { first = 0x10920, last = 0x1093F, description = "Lydian" },
["mahajani"] = { first = 0x11150, last = 0x1117F, description = "Mahajani" },
["mahjongtiles"] = { first = 0x1F000, last = 0x1F02F, description = "Mahjong Tiles" },
+ ["makasar"] = { first = 0x11EE0, last = 0x11EFF, description = "Makasar" },
["malayalam"] = { first = 0x00D00, last = 0x00D7F, otf="mlym", description = "Malayalam" },
["mandaic"] = { first = 0x00840, last = 0x0085F, otf="mand", description = "Mandaic" },
["manichaean"] = { first = 0x10AC0, last = 0x10AFF, description = "Manichaean" },
@@ -403,6 +410,8 @@ local blocks = allocate {
["masaramgondi"] = { first = 0x11D00, last = 0x11D5F, description = "Masaram Gondi" },
["mathematicalalphanumericsymbols"] = { first = 0x1D400, last = 0x1D7FF, math = true, description = "Mathematical Alphanumeric Symbols" },
["mathematicaloperators"] = { first = 0x02200, last = 0x022FF, math = true, description = "Mathematical Operators" },
+ ["mayannumerals"] = { first = 0x1D2E0, last = 0x1D2FF, description = "Mayan Numerals" },
+ ["medefaidrin"] = { first = 0x16E40, last = 0x16E9F, description = "Medefaidrin" },
["meeteimayek"] = { first = 0x0ABC0, last = 0x0ABFF, description = "Meetei Mayek" },
["meeteimayekextensions"] = { first = 0x0AAE0, last = 0x0AAFF, description = "Meetei Mayek Extensions" },
["mendekikakui"] = { first = 0x1E800, last = 0x1E8DF, description = "Mende Kikakui" },
@@ -438,6 +447,7 @@ local blocks = allocate {
["oldnortharabian"] = { first = 0x10A80, last = 0x10A9F, description = "Old North Arabian" },
["oldpermic"] = { first = 0x10350, last = 0x1037F, description = "Old Permic" },
["oldpersian"] = { first = 0x103A0, last = 0x103DF, otf="xpeo", description = "Old Persian" },
+ ["oldsogdian"] = { first = 0x10F00, last = 0x10F2F, description = "Old Sogdian" },
["oldsoutharabian"] = { first = 0x10A60, last = 0x10A7F, description = "Old South Arabian" },
["oldturkic"] = { first = 0x10C00, last = 0x10C4F, description = "Old Turkic" },
["opticalcharacterrecognition"] = { first = 0x02440, last = 0x0245F, description = "Optical Character Recognition" },
@@ -468,6 +478,7 @@ local blocks = allocate {
["sinhala"] = { first = 0x00D80, last = 0x00DFF, otf="sinh", description = "Sinhala" },
["sinhalaarchaicnumbers"] = { first = 0x111E0, last = 0x111FF, description = "Sinhala Archaic Numbers" },
["smallformvariants"] = { first = 0x0FE50, last = 0x0FE6F, description = "Small Form Variants" },
+ ["sogdian"] = { first = 0x10F30, last = 0x10F6F, description = "Sogdian" },
["sorasompeng"] = { first = 0x110D0, last = 0x110FF, description = "Sora Sompeng" },
["soyombo"] = { first = 0x11A50, last = 0x11AAF, description = "Soyombo" },
["spacingmodifierletters"] = { first = 0x002B0, last = 0x002FF, description = "Spacing Modifier Letters" },
@@ -606,7 +617,8 @@ characters.otfscripts = otfscripts
setmetatableindex(otfscripts,function(t,unicode)
for k, v in next, blocks do
- local first, last = v.first, v.last
+ local first = v.first
+ local last = v.last
if unicode >= first and unicode <= last then
local script = v.otf or "dflt"
for u=first,last do
@@ -631,25 +643,27 @@ function characters.getrange(name,expression) -- used in font fallback definitio
name = gsub(name,'"',"0x") -- goodie: tex hex notation
local start, stop
if expression then
- local first, rest = lpegmatch(splitter2,name)
- local range = rawget(blocks,lower(gsub(first,"[^a-zA-Z0-9]","")))
- if range then
- start = range.first
- stop = range.last
- local s = loadstring("return 0 " .. rest)
- if type(s) == "function" then
- local d = s()
- if type(d) == "number" then
- start = start + d
- stop = stop + d
- return start, stop, nil
+ local n = tonumber(name)
+ if n then
+ return n, n, nil
+ else
+ local first, rest = lpegmatch(splitter2,name)
+ local range = rawget(blocks,lower(gsub(first,"[^a-zA-Z0-9]","")))
+ if range then
+ local s = loadstring("return 0 " .. rest)
+ if type(s) == "function" then
+ local d = s()
+ if type(d) == "number" then
+ return range.first + d, range.last + d, nil
+ end
end
end
end
end
- start, stop = lpegmatch(splitter1,name)
+ local start, stop = lpegmatch(splitter1,name)
if start and stop then
- start, stop = tonumber(start,16) or tonumber(start), tonumber(stop,16) or tonumber(stop)
+ start = tonumber(start,16) or tonumber(start)
+ stop = tonumber(stop, 16) or tonumber(stop)
if start and stop then
return start, stop, nil
end
@@ -738,6 +752,10 @@ local is_punctuation = allocate ( tohash {
"pc","pd","ps","pe","pi","pf","po",
} )
+local is_symbol = allocate ( tohash {
+ "sm", "sc", "sk", "so",
+} )
+
-- to be redone: store checked characters
characters.is_character = is_character
@@ -746,6 +764,7 @@ characters.is_command = is_command
characters.is_spacing = is_spacing
characters.is_mark = is_mark
characters.is_punctuation = is_punctuation
+characters.is_symbol = is_symbol
local mti = function(t,k)
if type(k) == "number" then
@@ -1039,7 +1058,8 @@ setmetatableindex(specialchars, function(t,u)
local c = data[u]
local s = c and c.specials
if s then
- local tt, ttn = { }, 0
+ local tt = { }
+ local ttn = 0
for i=2,#s do
local si = s[i]
local c = data[si]
@@ -1265,9 +1285,11 @@ lpegpatterns.utf8lower = utf8lower -- string
lpegpatterns.utf8upper = utf8upper -- string
lpegpatterns.utf8shape = utf8shape -- string
-function characters.lower (str) return lpegmatch(utf8lower,str) end
-function characters.upper (str) return lpegmatch(utf8upper,str) end
-function characters.shaped(str) return lpegmatch(utf8shape,str) end
+function characters.lower (str) return str and lpegmatch(utf8lower,str) or "" end
+function characters.upper (str) return str and lpegmatch(utf8upper,str) or "" end
+function characters.shaped(str) return str and lpegmatch(utf8shape,str) or "" end
+
+lpeg.setutfcasers(characters.lower,characters.upper)
-- local str = [[
-- ÀÁÂÃÄÅàáâãäå àáâãäåàáâãäå ÀÁÂÃÄÅÀÁÂÃÄÅ AAAAAAaaaaaa