summaryrefslogtreecommitdiff
path: root/tex/context/base/char-ini.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/char-ini.lua')
-rw-r--r--tex/context/base/char-ini.lua106
1 files changed, 101 insertions, 5 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index 7dc381031..33b1550af 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -145,6 +145,7 @@ local blocks = allocate {
["bamum"] = { first = 0x0A6A0, last = 0x0A6FF, description = "Bamum" },
["bamumsupplement"] = { first = 0x16800, last = 0x16A3F, description = "Bamum Supplement" },
["basiclatin"] = { first = 0x00000, last = 0x0007F, otf="latn", description = "Basic Latin" },
+ ["bassavah"] = { first = 0x16AD0, last = 0x16AFF, description = "Bassa Vah" },
["batak"] = { first = 0x01BC0, last = 0x01BFF, description = "Batak" },
["bengali"] = { first = 0x00980, last = 0x009FF, otf="beng", description = "Bengali" },
["blockelements"] = { first = 0x02580, last = 0x0259F, otf="bopo", description = "Block Elements" },
@@ -156,8 +157,10 @@ local blocks = allocate {
["buginese"] = { first = 0x01A00, last = 0x01A1F, otf="bugi", description = "Buginese" },
["buhid"] = { first = 0x01740, last = 0x0175F, otf="buhd", description = "Buhid" },
["byzantinemusicalsymbols"] = { first = 0x1D000, last = 0x1D0FF, otf="byzm", description = "Byzantine Musical Symbols" },
+ ["caucasianalbanian"] = { first = 0x10530, last = 0x1056F, description = "Caucasian Albanian" },
["commonindicnumberforms"] = { first = 0x0A830, last = 0x0A83F, description = "Common Indic Number Forms" },
["carian"] = { first = 0x102A0, last = 0x102DF, description = "Carian" },
+ ["chakma"] = { first = 0x11100, last = 0x1114F, description = "Chakma" },
["cham"] = { first = 0x0AA00, last = 0x0AA5F, description = "Cham" },
["cherokee"] = { first = 0x013A0, last = 0x013FF, otf="cher", description = "Cherokee" },
["cjkcompatibility"] = { first = 0x03300, last = 0x033FF, otf="hang", description = "CJK Compatibility" },
@@ -170,12 +173,16 @@ local blocks = allocate {
["cjkunifiedideographs"] = { first = 0x04E00, last = 0x09FFF, otf="hang", description = "CJK Unified Ideographs", catcode = "letter" },
["cjkunifiedideographsextensiona"] = { first = 0x03400, last = 0x04DBF, otf="hang", description = "CJK Unified Ideographs Extension A" },
["cjkunifiedideographsextensionb"] = { first = 0x20000, last = 0x2A6DF, otf="hang", description = "CJK Unified Ideographs Extension B" },
+ ["cjkunifiedideographsextensionc"] = { first = 0x2A700, last = 0x2B73F, description = "CJK Unified Ideographs Extension C" },
+ ["cjkunifiedideographsextensiond"] = { first = 0x2B740, last = 0x2B81F, description = "CJK Unified Ideographs Extension D" },
["combiningdiacriticalmarks"] = { first = 0x00300, last = 0x0036F, description = "Combining Diacritical Marks" },
+ ["combiningdiacriticalmarksextended"] = { first = 0x01AB0, last = 0x01AFF, description = "Combining Diacritical Marks Extended" },
["combiningdiacriticalmarksforsymbols"] = { first = 0x020D0, last = 0x020FF, description = "Combining Diacritical Marks for Symbols" },
["combiningdiacriticalmarkssupplement"] = { first = 0x01DC0, last = 0x01DFF, description = "Combining Diacritical Marks Supplement" },
["combininghalfmarks"] = { first = 0x0FE20, last = 0x0FE2F, description = "Combining Half Marks" },
["controlpictures"] = { first = 0x02400, last = 0x0243F, description = "Control Pictures" },
["coptic"] = { first = 0x02C80, last = 0x02CFF, otf="copt", description = "Coptic" },
+ ["copticepactnumbers"] = { first = 0x102E0, last = 0x102FF, description = "Coptic Epact Numbers" },
["countingrodnumerals"] = { first = 0x1D360, last = 0x1D37F, description = "Counting Rod Numerals" },
["cuneiform"] = { first = 0x12000, last = 0x123FF, otf="xsux", description = "Cuneiform" },
["cuneiformnumbersandpunctuation"] = { first = 0x12400, last = 0x1247F, otf="xsux", description = "Cuneiform Numbers and Punctuation" },
@@ -190,7 +197,9 @@ local blocks = allocate {
["devanagariextended"] = { first = 0x0A8E0, last = 0x0A8FF, description = "Devanagari Extended" },
["dingbats"] = { first = 0x02700, last = 0x027BF, description = "Dingbats" },
["dominotiles"] = { first = 0x1F030, last = 0x1F09F, description = "Domino Tiles" },
+ ["duployan"] = { first = 0x1BC00, last = 0x1BC9F, description = "Duployan" },
["egyptianhieroglyphs"] = { first = 0x13000, last = 0x1342F, description = "Egyptian Hieroglyphs" },
+ ["elbasan"] = { first = 0x10500, last = 0x1052F, description = "Elbasan" },
["emoticons"] = { first = 0x1F600, last = 0x1F64F, description = "Emoticons" },
["enclosedalphanumericsupplement"] = { first = 0x1F100, last = 0x1F1FF, description = "Enclosed Alphanumeric Supplement" },
["enclosedalphanumerics"] = { first = 0x02460, last = 0x024FF, description = "Enclosed Alphanumerics" },
@@ -202,10 +211,12 @@ local blocks = allocate {
["ethiopicsupplement"] = { first = 0x01380, last = 0x0139F, otf="ethi", description = "Ethiopic Supplement" },
["generalpunctuation"] = { first = 0x02000, last = 0x0206F, description = "General Punctuation" },
["geometricshapes"] = { first = 0x025A0, last = 0x025FF, description = "Geometric Shapes" },
+ ["geometricshapesextended"] = { first = 0x1F780, last = 0x1F7FF, description = "Geometric Shapes Extended" },
["georgian"] = { first = 0x010A0, last = 0x010FF, otf="geor", description = "Georgian" },
["georgiansupplement"] = { first = 0x02D00, last = 0x02D2F, otf="geor", description = "Georgian Supplement" },
["glagolitic"] = { first = 0x02C00, last = 0x02C5F, otf="glag", description = "Glagolitic" },
["gothic"] = { first = 0x10330, last = 0x1034F, otf="goth", description = "Gothic" },
+ ["grantha"] = { first = 0x11300, last = 0x1137F, description = "Grantha" },
["greekandcoptic"] = { first = 0x00370, last = 0x003FF, otf="grek", description = "Greek and Coptic" },
["greekextended"] = { first = 0x01F00, last = 0x01FFF, otf="grek", description = "Greek Extended" },
["gujarati"] = { first = 0x00A80, last = 0x00AFF, otf="gujr", description = "Gujarati" },
@@ -238,29 +249,36 @@ local blocks = allocate {
["kharoshthi"] = { first = 0x10A00, last = 0x10A5F, otf="khar", description = "Kharoshthi" },
["khmer"] = { first = 0x01780, last = 0x017FF, otf="khmr", description = "Khmer" },
["khmersymbols"] = { first = 0x019E0, last = 0x019FF, otf="khmr", description = "Khmer Symbols" },
+ ["khojki"] = { first = 0x11200, last = 0x1124F, description = "Khojki" },
+ ["khudawadi"] = { first = 0x112B0, last = 0x112FF, description = "Khudawadi" },
["lao"] = { first = 0x00E80, last = 0x00EFF, otf="lao", description = "Lao" },
["latinextendeda"] = { first = 0x00100, last = 0x0017F, otf="latn", description = "Latin Extended-A" },
["latinextendedadditional"] = { first = 0x01E00, last = 0x01EFF, otf="latn", description = "Latin Extended Additional" },
["latinextendedb"] = { first = 0x00180, last = 0x0024F, otf="latn", description = "Latin Extended-B" },
["latinextendedc"] = { first = 0x02C60, last = 0x02C7F, otf="latn", description = "Latin Extended-C" },
["latinextendedd"] = { first = 0x0A720, last = 0x0A7FF, otf="latn", description = "Latin Extended-D" },
+ ["latinextendede"] = { first = 0x0AB30, last = 0x0AB6F, description = "Latin Extended-E" },
["latinsupplement"] = { first = 0x00080, last = 0x000FF, otf="latn", description = "Latin-1 Supplement" },
["lepcha"] = { first = 0x01C00, last = 0x01C4F, description = "Lepcha" },
["letterlikesymbols"] = { first = 0x02100, last = 0x0214F, description = "Letterlike Symbols" },
["limbu"] = { first = 0x01900, last = 0x0194F, otf="limb", description = "Limbu" },
+ ["lineara"] = { first = 0x10600, last = 0x1077F, description = "Linear A" },
["linearbideograms"] = { first = 0x10080, last = 0x100FF, otf="linb", description = "Linear B Ideograms" },
["linearbsyllabary"] = { first = 0x10000, last = 0x1007F, otf="linb", description = "Linear B Syllabary" },
["lisu"] = { first = 0x0A4D0, last = 0x0A4FF, description = "Lisu" },
["lowsurrogates"] = { first = 0x0DC00, last = 0x0DFFF, description = "Low Surrogates" },
["lycian"] = { first = 0x10280, last = 0x1029F, description = "Lycian" },
["lydian"] = { first = 0x10920, last = 0x1093F, description = "Lydian" },
+ ["mahajani"] = { first = 0x11150, last = 0x1117F, description = "Mahajani" },
["mahjongtiles"] = { first = 0x1F000, last = 0x1F02F, description = "Mahjong Tiles" },
["malayalam"] = { first = 0x00D00, last = 0x00D7F, otf="mlym", description = "Malayalam" },
- ["mandiac"] = { first = 0x00840, last = 0x0085F, otf="mand", description = "Mandaic" },
+ ["mandaic"] = { first = 0x00840, last = 0x0085F, otf="mand", description = "Mandaic" },
+ ["manichaean"] = { first = 0x10AC0, last = 0x10AFF, description = "Manichaean" },
["mathematicalalphanumericsymbols"] = { first = 0x1D400, last = 0x1D7FF, description = "Mathematical Alphanumeric Symbols" },
["mathematicaloperators"] = { first = 0x02200, last = 0x022FF, description = "Mathematical Operators" },
["meeteimayek"] = { first = 0x0ABC0, last = 0x0ABFF, description = "Meetei Mayek" },
["meeteimayekextensions"] = { first = 0x0AAE0, last = 0x0AAFF, description = "Meetei Mayek Extensions" },
+ ["mendekikakui"] = { first = 0x1E800, last = 0x1E8DF, description = "Mende Kikakui" },
["meroiticcursive"] = { first = 0x109A0, last = 0x109FF, description = "Meroitic Cursive" },
["meroitichieroglyphs"] = { first = 0x10980, last = 0x1099F, description = "Meroitic Hieroglyphs" },
["miao"] = { first = 0x16F00, last = 0x16F9F, description = "Miao" },
@@ -268,25 +286,35 @@ local blocks = allocate {
["miscellaneousmathematicalsymbolsb"] = { first = 0x02980, last = 0x029FF, description = "Miscellaneous Mathematical Symbols-B" },
["miscellaneoussymbols"] = { first = 0x02600, last = 0x026FF, description = "Miscellaneous Symbols" },
["miscellaneoussymbolsandarrows"] = { first = 0x02B00, last = 0x02BFF, description = "Miscellaneous Symbols and Arrows" },
- ["miscellaneoussymbolsandpictographs"] = { first = 0x1F300, last = 0x1F5FF, description = "Miscellaneous Symbols And Pictographs" },
+ ["miscellaneoussymbolsandpictographs"] = { first = 0x1F300, last = 0x1F5FF, description = "Miscellaneous Symbols and Pictographs" },
["miscellaneoustechnical"] = { first = 0x02300, last = 0x023FF, description = "Miscellaneous Technical" },
+ ["modi"] = { first = 0x11600, last = 0x1165F, description = "Modi" },
["modifiertoneletters"] = { first = 0x0A700, last = 0x0A71F, description = "Modifier Tone Letters" },
["mongolian"] = { first = 0x01800, last = 0x018AF, otf="mong", description = "Mongolian" },
+ ["mro"] = { first = 0x16A40, last = 0x16A6F, description = "Mro" },
["musicalsymbols"] = { first = 0x1D100, last = 0x1D1FF, otf="musc", description = "Musical Symbols" },
["myanmar"] = { first = 0x01000, last = 0x0109F, otf="mymr", description = "Myanmar" },
["myanmarextendeda"] = { first = 0x0AA60, last = 0x0AA7F, description = "Myanmar Extended-A" },
+ ["myanmarextendedb"] = { first = 0x0A9E0, last = 0x0A9FF, description = "Myanmar Extended-B" },
+ ["nabataean"] = { first = 0x10880, last = 0x108AF, description = "Nabataean" },
["newtailue"] = { first = 0x01980, last = 0x019DF, description = "New Tai Lue" },
["nko"] = { first = 0x007C0, last = 0x007FF, otf="nko", description = "NKo" },
["numberforms"] = { first = 0x02150, last = 0x0218F, description = "Number Forms" },
["ogham"] = { first = 0x01680, last = 0x0169F, otf="ogam", description = "Ogham" },
["olchiki"] = { first = 0x01C50, last = 0x01C7F, description = "Ol Chiki" },
["olditalic"] = { first = 0x10300, last = 0x1032F, otf="ital", description = "Old Italic" },
+ ["oldnortharabian"] = { first = 0x10A80, last = 0x10A9F, description = "Old North Arabian" },
+ ["oldpermic"] = { first = 0x10350, last = 0x1037F, description = "Old Permic" },
["oldpersian"] = { first = 0x103A0, last = 0x103DF, otf="xpeo", description = "Old Persian" },
["oldsoutharabian"] = { first = 0x10A60, last = 0x10A7F, description = "Old South Arabian" },
- ["odlturkic"] = { first = 0x10C00, last = 0x10C4F, description = "Old Turkic" },
+ ["oldturkic"] = { first = 0x10C00, last = 0x10C4F, description = "Old Turkic" },
["opticalcharacterrecognition"] = { first = 0x02440, last = 0x0245F, description = "Optical Character Recognition" },
["oriya"] = { first = 0x00B00, last = 0x00B7F, otf="orya", description = "Oriya" },
+ ["ornamentaldingbats"] = { first = 0x1F650, last = 0x1F67F, description = "Ornamental Dingbats" },
["osmanya"] = { first = 0x10480, last = 0x104AF, otf="osma", description = "Osmanya" },
+ ["pahawhhmong"] = { first = 0x16B00, last = 0x16B8F, description = "Pahawh Hmong" },
+ ["palmyrene"] = { first = 0x10860, last = 0x1087F, description = "Palmyrene" },
+ ["paucinhau"] = { first = 0x11AC0, last = 0x11AFF, description = "Pau Cin Hau" },
["phagspa"] = { first = 0x0A840, last = 0x0A87F, otf="phag", description = "Phags-pa" },
["phaistosdisc"] = { first = 0x101D0, last = 0x101FF, description = "Phaistos Disc" },
["phoenician"] = { first = 0x10900, last = 0x1091F, otf="phnx", description = "Phoenician" },
@@ -294,6 +322,7 @@ local blocks = allocate {
["phoneticextensionssupplement"] = { first = 0x01D80, last = 0x01DBF, description = "Phonetic Extensions Supplement" },
["playingcards"] = { first = 0x1F0A0, last = 0x1F0FF, description = "Playing Cards" },
["privateusearea"] = { first = 0x0E000, last = 0x0F8FF, description = "Private Use Area" },
+ ["psalterpahlavi"] = { first = 0x10B80, last = 0x10BAF, description = "Psalter Pahlavi" },
["rejang"] = { first = 0x0A930, last = 0x0A95F, description = "Rejang" },
["ruminumeralsymbols"] = { first = 0x10E60, last = 0x10E7F, description = "Rumi Numeral Symbols" },
["runic"] = { first = 0x016A0, last = 0x016FF, otf="runr", description = "Runic" },
@@ -301,7 +330,10 @@ local blocks = allocate {
["saurashtra"] = { first = 0x0A880, last = 0x0A8DF, description = "Saurashtra" },
["sharada"] = { first = 0x11180, last = 0x111DF, description = "Sharada" },
["shavian"] = { first = 0x10450, last = 0x1047F, otf="shaw", description = "Shavian" },
+ ["shorthandformatcontrols"] = { first = 0x1BCA0, last = 0x1BCAF, description = "Shorthand Format Controls" },
+ ["siddham"] = { first = 0x11580, last = 0x115FF, description = "Siddham" },
["sinhala"] = { first = 0x00D80, last = 0x00DFF, otf="sinh", description = "Sinhala" },
+ ["sinhalaarchaicnumbers"] = { first = 0x111E0, last = 0x111FF, description = "Sinhala Archaic Numbers" },
["smallformvariants"] = { first = 0x0FE50, last = 0x0FE6F, description = "Small Form Variants" },
["sorasompeng"] = { first = 0x110D0, last = 0x110FF, description = "Sora Sompeng" },
["spacingmodifierletters"] = { first = 0x002B0, last = 0x002FF, description = "Spacing Modifier Letters" },
@@ -311,6 +343,7 @@ local blocks = allocate {
["superscriptsandsubscripts"] = { first = 0x02070, last = 0x0209F, description = "Superscripts and Subscripts" },
["supplementalarrowsa"] = { first = 0x027F0, last = 0x027FF, description = "Supplemental Arrows-A" },
["supplementalarrowsb"] = { first = 0x02900, last = 0x0297F, description = "Supplemental Arrows-B" },
+ ["supplementalarrowsc"] = { first = 0x1F800, last = 0x1F8FF, description = "Supplemental Arrows-C" },
["supplementalmathematicaloperators"] = { first = 0x02A00, last = 0x02AFF, description = "Supplemental Mathematical Operators" },
["supplementalpunctuation"] = { first = 0x02E00, last = 0x02E7F, description = "Supplemental Punctuation" },
["supplementaryprivateuseareaa"] = { first = 0xF0000, last = 0xFFFFF, description = "Supplementary Private Use Area-A" },
@@ -331,7 +364,8 @@ local blocks = allocate {
["thai"] = { first = 0x00E00, last = 0x00E7F, otf="thai", description = "Thai" },
["tibetan"] = { first = 0x00F00, last = 0x00FFF, otf="tibt", description = "Tibetan" },
["tifinagh"] = { first = 0x02D30, last = 0x02D7F, otf="tfng", description = "Tifinagh" },
- ["transportandmapsymbols"] = { first = 0x1F680, last = 0x1F6FF, description = "Transport And Map Symbols" },
+ ["tirhuta"] = { first = 0x11480, last = 0x114DF, description = "Tirhuta" },
+ ["transportandmapsymbols"] = { first = 0x1F680, last = 0x1F6FF, description = "Transport and Map Symbols" },
["ugaritic"] = { first = 0x10380, last = 0x1039F, otf="ugar", description = "Ugaritic" },
["unifiedcanadianaboriginalsyllabics"] = { first = 0x01400, last = 0x0167F, otf="cans", description = "Unified Canadian Aboriginal Syllabics" },
["unifiedcanadianaboriginalsyllabicsextended"] = { first = 0x018B0, last = 0x018FF, description = "Unified Canadian Aboriginal Syllabics Extended" },
@@ -340,6 +374,7 @@ local blocks = allocate {
["variationselectorssupplement"] = { first = 0xE0100, last = 0xE01EF, description = "Variation Selectors Supplement" },
["vedicextensions"] = { first = 0x01CD0, last = 0x01CFF, description = "Vedic Extensions" },
["verticalforms"] = { first = 0x0FE10, last = 0x0FE1F, description = "Vertical Forms" },
+ ["warangciti"] = { first = 0x118A0, last = 0x118FF, description = "Warang Citi" },
["yijinghexagramsymbols"] = { first = 0x04DC0, last = 0x04DFF, otf="yi", description = "Yijing Hexagram Symbols" },
["yiradicals"] = { first = 0x0A490, last = 0x0A4CF, otf="yi", description = "Yi Radicals" },
["yisyllables"] = { first = 0x0A000, last = 0x0A48F, otf="yi", description = "Yi Syllables" },
@@ -492,7 +527,68 @@ setmetatableindex(characters.is_punctuation,mti)
-- linebreak: todo: hash
--
-- normative : BK CR LF CM SG GL CB SP ZW NL WJ JL JV JT H2 H3
--- informative : XX OP CL QU NS EX SY IS PR PO NU AL ID IN HY BB BA SA AI B2 new:CP
+-- informative : XX OP CL CP QU NS EX SY IS PR PO NU AL ID IN HY BB BA SA AI B2 HL CJ RI
+--
+-- comments taken from standard:
+
+characters.linebreaks = {
+
+ -- non-tailorable line breaking classes
+
+ ["bk"] = "mandatory break", -- nl, ps : cause a line break (after)
+ ["cr"] = "carriage return", -- cr : cause a line break (after), except between cr and lf
+ ["lf"] = "line feed", -- lf : cause a line break (after)
+ ["cm"] = "combining mark", -- combining marks, control codes : prohibit a line break between the character and the preceding character
+ ["nl"] = "next line", -- nel : cause a line break (after)
+ ["sg"] = "surrogate", -- surrogates :do not occur in well-formed text
+ ["wj"] = "word joiner", -- wj : prohibit line breaks before and after
+ ["zw"] = "zero width space", -- zwsp : provide a break opportunity
+ ["gl"] = "non-breaking (glue)", -- cgj, nbsp, zwnbsp : prohibit line breaks before and after
+ ["sp"] = "space", -- space : enable indirect line breaks
+
+ -- break opportunities
+
+ ["b2"] = "break opportunity before and after", -- em dash : provide a line break opportunity before and after the character
+ ["ba"] = "break after", -- spaces, hyphens : generally provide a line break opportunity after the character
+ ["bb"] = "break before", -- punctuation used in dictionaries : generally provide a line break opportunity before the character
+ ["hy"] = "hyphen", -- hyphen-minus : provide a line break opportunity after the character, except in numeric context
+ ["cb"] = "contingent break opportunity", -- inline objects : provide a line break opportunity contingent on additional information
+
+ -- characters prohibiting certain breaks
+
+ ["cl"] = "close punctuation", -- “}”, “❳”, “⟫” etc. : prohibit line breaks before
+ ["cp"] = "close parenthesis", -- “)”, “]” : prohibit line breaks before
+ ["ex"] = "exclamation/interrogation", -- “!”, “?”, etc. : prohibit line breaks before
+ ["in"] = "inseparable", -- leaders : allow only indirect line breaks between pairs
+ ["ns"] = "nonstarter", -- “‼”, “‽”, “⁇”, “⁉”, etc. : allow only indirect line breaks before
+ ["op"] = "open punctuation", -- “(“, “[“, “{“, etc. : prohibit line breaks after
+ ["qu"] = "quotation", -- quotation marks : act like they are both opening and closing
+
+ -- numeric context
+
+ ["is"] = "infix numeric separator", -- . , : prevent breaks after any and before numeric
+ ["nu"] = "numeric", -- digits : form numeric expressions for line breaking purposes
+ ["po"] = "postfix numeric", -- %, ¢ : do not break following a numeric expression
+ ["pr"] = "prefix numeric", -- $, £, ¥, etc. : do not break in front of a numeric expression
+ ["sy"] = "symbols allowing break after", -- / : prevent a break before, and allow a break after
+
+ -- other characters
+
+ ["ai"] = "ambiguous (alphabetic or ideographic)", -- characters with ambiguous east asian width : act like al when the resolved eaw is n; otherwise, act as id
+ ["al"] = "alphabetic", -- alphabets and regular symbols : are alphabetic characters or symbols that are used with alphabetic characters
+ ["cj"] = "conditional japanese starter", -- small kana : treat as ns or id for strict or normal breaking.
+ ["h2"] = "hangul lv syllable", -- hangul : form korean syllable blocks
+ ["h3"] = "hangul lvt syllable", -- hangul : form korean syllable blocks
+ ["hl"] = "hebrew letter", -- hebrew : do not break around a following hyphen; otherwise act as alphabetic
+ ["id"] = "ideographic", -- ideographs : break before or after, except in some numeric context
+ ["jl"] = "hangul l jamo", -- conjoining jamo : form korean syllable blocks
+ ["jv"] = "hangul v jamo", -- conjoining jamo : form korean syllable blocks
+ ["jt"] = "hangul t jamo", -- conjoining jamo : form korean syllable blocks
+ ["ri"] = "regional indicator", -- regional indicator symbol letter a .. z : keep together, break before and after from others
+ ["sa"] = "complex context dependent (south east asian)", -- south east asian: thai, lao, khmer : provide a line break opportunity contingent on additional, language-specific context analysis
+ ["xx"] = "unknown", -- most unassigned, private-use : have as yet unknown line breaking behavior or unassigned code positions
+
+}
-- east asian width:
--