summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/font-phb-imp-library.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/font-phb-imp-library.lua')
-rw-r--r--tex/context/base/mkiv/font-phb-imp-library.lua498
1 files changed, 498 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/font-phb-imp-library.lua b/tex/context/base/mkiv/font-phb-imp-library.lua
new file mode 100644
index 000000000..fe991b023
--- /dev/null
+++ b/tex/context/base/mkiv/font-phb-imp-library.lua
@@ -0,0 +1,498 @@
+if not modules then modules = { } end modules ['font-phb-imp-library'] = {
+ version = 1.000, -- 2020.01.08,
+ comment = "companion to font-txt.mkiv",
+ original = "derived from a prototype by Kai Eigner",
+ author = "Hans Hagen", -- so don't blame KE
+ copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- The hb library comes in versions and the one I tested in 2016 was part of the inkscape
+-- suite. In principle one can have incompatibilities due to updates but that is the nature
+-- of a library. When a library ie expected one has better use the system version, if only
+-- to make sure that different programs behave the same.
+--
+-- The main reason for testing this approach was that when Idris was working on his fonts,
+-- we wanted to know how different shapers deal with it and the hb command line program
+-- could provide uniscribe output. For the context shaper uniscribe is the reference, also
+-- because Idris started out with Volt a decade ago.
+--
+-- We treat the lib as a black box as it should be. At some point Kai Eigner made an ffi
+-- binding and that one was adapted to the plugin approach of context. It saved me the
+-- trouble of looking at source files to figure it all out. Below is the adapted code.
+--
+-- Keep in mind that this file is for mkiv only. It won't work in lmtx where instead of
+-- ffi we use simple optional libraries with delayed bindings. In principle this mechanism
+-- is generic but because other macropackages follow another route we don't spend time
+-- on that code path here.
+
+local next, tonumber, pcall = next, tonumber, pcall
+local reverse = table.reverse
+local loaddata = io.loaddata
+
+local report = utilities.hb.report or print
+local packtoutf32 = utilities.hb.helpers.packtoutf32
+
+if not FFISUPPORTED or not ffi then
+ report("no ffi support")
+ return
+elseif CONTEXTLMTXMODE and CONTEXTLMTXMODE > 0 then
+ report("no ffi support")
+ return
+elseif not context then
+ return
+end
+
+local harfbuzz = ffilib(os.name == "windows" and "libharfbuzz-0" or "libharfbuzz")
+
+if not harfbuzz then
+ report("no hb library found")
+ return
+end
+
+-- jit.on() : on very long (hundreds of pages) it looks faster but
+-- the normal font processor slows down ... this is consistent with
+-- earlier observations that turning it on is often slower on these
+-- one-shot tex runs (also because we don't use many math and/or
+-- string helpers and therefore the faster vm of luajit gives most
+-- benefits (given the patched hasher)
+
+-- Here is Kai's ffi mapping, a bit reorganized. We only define what we
+-- need. I'm happy that Kai did the deciphering of the api that I could
+-- then build upon.
+
+ffi.cdef [[
+
+typedef struct hb_blob_t hb_blob_t ;
+
+typedef enum {
+ HB_MEMORY_MODE_DUPLICATE,
+ HB_MEMORY_MODE_READONLY,
+ HB_MEMORY_MODE_WRITABLE,
+ HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE
+} hb_memory_mode_t ;
+
+typedef void (*hb_destroy_func_t) (
+ void *user_data
+) ;
+
+typedef struct hb_face_t hb_face_t ;
+
+typedef const struct hb_language_impl_t *hb_language_t ;
+
+typedef struct hb_buffer_t hb_buffer_t ;
+
+typedef enum {
+ HB_SCRIPT_COMMON, HB_SCRIPT_INHERITED, HB_SCRIPT_UNKNOWN,
+
+ HB_SCRIPT_ARABIC, HB_SCRIPT_ARMENIAN, HB_SCRIPT_BENGALI, HB_SCRIPT_CYRILLIC,
+ HB_SCRIPT_DEVANAGARI, HB_SCRIPT_GEORGIAN, HB_SCRIPT_GREEK,
+ HB_SCRIPT_GUJARATI, HB_SCRIPT_GURMUKHI, HB_SCRIPT_HANGUL, HB_SCRIPT_HAN,
+ HB_SCRIPT_HEBREW, HB_SCRIPT_HIRAGANA, HB_SCRIPT_KANNADA, HB_SCRIPT_KATAKANA,
+ HB_SCRIPT_LAO, HB_SCRIPT_LATIN, HB_SCRIPT_MALAYALAM, HB_SCRIPT_ORIYA,
+ HB_SCRIPT_TAMIL, HB_SCRIPT_TELUGU, HB_SCRIPT_THAI, HB_SCRIPT_TIBETAN,
+ HB_SCRIPT_BOPOMOFO, HB_SCRIPT_BRAILLE, HB_SCRIPT_CANADIAN_SYLLABICS,
+ HB_SCRIPT_CHEROKEE, HB_SCRIPT_ETHIOPIC, HB_SCRIPT_KHMER, HB_SCRIPT_MONGOLIAN,
+ HB_SCRIPT_MYANMAR, HB_SCRIPT_OGHAM, HB_SCRIPT_RUNIC, HB_SCRIPT_SINHALA,
+ HB_SCRIPT_SYRIAC, HB_SCRIPT_THAANA, HB_SCRIPT_YI, HB_SCRIPT_DESERET,
+ HB_SCRIPT_GOTHIC, HB_SCRIPT_OLD_ITALIC, HB_SCRIPT_BUHID, HB_SCRIPT_HANUNOO,
+ HB_SCRIPT_TAGALOG, HB_SCRIPT_TAGBANWA, HB_SCRIPT_CYPRIOT, HB_SCRIPT_LIMBU,
+ HB_SCRIPT_LINEAR_B, HB_SCRIPT_OSMANYA, HB_SCRIPT_SHAVIAN, HB_SCRIPT_TAI_LE,
+ HB_SCRIPT_UGARITIC, HB_SCRIPT_BUGINESE, HB_SCRIPT_COPTIC,
+ HB_SCRIPT_GLAGOLITIC, HB_SCRIPT_KHAROSHTHI, HB_SCRIPT_NEW_TAI_LUE,
+ HB_SCRIPT_OLD_PERSIAN, HB_SCRIPT_SYLOTI_NAGRI, HB_SCRIPT_TIFINAGH,
+ HB_SCRIPT_BALINESE, HB_SCRIPT_CUNEIFORM, HB_SCRIPT_NKO, HB_SCRIPT_PHAGS_PA,
+ HB_SCRIPT_PHOENICIAN, HB_SCRIPT_CARIAN, HB_SCRIPT_CHAM, HB_SCRIPT_KAYAH_LI,
+ HB_SCRIPT_LEPCHA, HB_SCRIPT_LYCIAN, HB_SCRIPT_LYDIAN, HB_SCRIPT_OL_CHIKI,
+ HB_SCRIPT_REJANG, HB_SCRIPT_SAURASHTRA, HB_SCRIPT_SUNDANESE, HB_SCRIPT_VAI,
+ HB_SCRIPT_AVESTAN, HB_SCRIPT_BAMUM, HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
+ HB_SCRIPT_IMPERIAL_ARAMAIC, HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
+ HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, HB_SCRIPT_JAVANESE, HB_SCRIPT_KAITHI,
+ HB_SCRIPT_LISU, HB_SCRIPT_MEETEI_MAYEK, HB_SCRIPT_OLD_SOUTH_ARABIAN,
+ HB_SCRIPT_OLD_TURKIC, HB_SCRIPT_SAMARITAN, HB_SCRIPT_TAI_THAM,
+ HB_SCRIPT_TAI_VIET, HB_SCRIPT_BATAK, HB_SCRIPT_BRAHMI, HB_SCRIPT_MANDAIC,
+ HB_SCRIPT_CHAKMA, HB_SCRIPT_MEROITIC_CURSIVE, HB_SCRIPT_MEROITIC_HIEROGLYPHS,
+ HB_SCRIPT_MIAO, HB_SCRIPT_SHARADA, HB_SCRIPT_SORA_SOMPENG, HB_SCRIPT_TAKRI,
+ HB_SCRIPT_BASSA_VAH, HB_SCRIPT_CAUCASIAN_ALBANIAN, HB_SCRIPT_DUPLOYAN,
+ HB_SCRIPT_ELBASAN, HB_SCRIPT_GRANTHA, HB_SCRIPT_KHOJKI, HB_SCRIPT_KHUDAWADI,
+ HB_SCRIPT_LINEAR_A, HB_SCRIPT_MAHAJANI, HB_SCRIPT_MANICHAEAN,
+ HB_SCRIPT_MENDE_KIKAKUI, HB_SCRIPT_MODI, HB_SCRIPT_MRO, HB_SCRIPT_NABATAEAN,
+ HB_SCRIPT_OLD_NORTH_ARABIAN, HB_SCRIPT_OLD_PERMIC, HB_SCRIPT_PAHAWH_HMONG,
+ HB_SCRIPT_PALMYRENE, HB_SCRIPT_PAU_CIN_HAU, HB_SCRIPT_PSALTER_PAHLAVI,
+ HB_SCRIPT_SIDDHAM, HB_SCRIPT_TIRHUTA, HB_SCRIPT_WARANG_CITI, HB_SCRIPT_AHOM,
+ HB_SCRIPT_ANATOLIAN_HIEROGLYPHS, HB_SCRIPT_HATRAN, HB_SCRIPT_MULTANI,
+ HB_SCRIPT_OLD_HUNGARIAN, HB_SCRIPT_SIGNWRITING, HB_SCRIPT_ADLAM,
+ HB_SCRIPT_BHAIKSUKI, HB_SCRIPT_MARCHEN, HB_SCRIPT_OSAGE, HB_SCRIPT_TANGUT,
+ HB_SCRIPT_NEWA, HB_SCRIPT_MASARAM_GONDI, HB_SCRIPT_NUSHU, HB_SCRIPT_SOYOMBO,
+ HB_SCRIPT_ZANABAZAR_SQUARE, HB_SCRIPT_DOGRA, HB_SCRIPT_GUNJALA_GONDI,
+ HB_SCRIPT_HANIFI_ROHINGYA, HB_SCRIPT_MAKASAR, HB_SCRIPT_MEDEFAIDRIN,
+ HB_SCRIPT_OLD_SOGDIAN, HB_SCRIPT_SOGDIAN, HB_SCRIPT_ELYMAIC,
+ HB_SCRIPT_NANDINAGARI, HB_SCRIPT_NYIAKENG_PUACHUE_HMONG, HB_SCRIPT_WANCHO,
+
+ HB_SCRIPT_INVALID, _HB_SCRIPT_MAX_VALUE, _HB_SCRIPT_MAX_VALUE_SIGNED,
+} hb_script_t ;
+
+typedef enum {
+ HB_DIRECTION_INVALID,
+ HB_DIRECTION_LTR,
+ HB_DIRECTION_RTL,
+ HB_DIRECTION_TTB,
+ HB_DIRECTION_BTT
+} hb_direction_t ;
+
+typedef int hb_bool_t ;
+
+typedef uint32_t hb_tag_t ;
+
+typedef struct hb_feature_t {
+ hb_tag_t tag;
+ uint32_t value;
+ unsigned int start;
+ unsigned int end;
+} hb_feature_t ;
+
+typedef struct hb_font_t hb_font_t ;
+
+typedef uint32_t hb_codepoint_t ;
+typedef int32_t hb_position_t ;
+typedef uint32_t hb_mask_t ;
+
+typedef union _hb_var_int_t {
+ uint32_t u32;
+ int32_t i32;
+ uint16_t u16[2];
+ int16_t i16[2];
+ uint8_t u8[4];
+ int8_t i8[4];
+} hb_var_int_t ;
+
+typedef struct hb_glyph_info_t {
+ hb_codepoint_t codepoint ;
+ hb_mask_t mask ;
+ uint32_t cluster ;
+ /*< private >*/
+ hb_var_int_t var1 ;
+ hb_var_int_t var2 ;
+} hb_glyph_info_t ;
+
+typedef struct hb_glyph_position_t {
+ hb_position_t x_advance ;
+ hb_position_t y_advance ;
+ hb_position_t x_offset ;
+ hb_position_t y_offset ;
+ /*< private >*/
+ hb_var_int_t var ;
+} hb_glyph_position_t ;
+
+const char * hb_version_string (
+ void
+) ;
+
+hb_blob_t * hb_blob_create (
+ const char *data,
+ unsigned int length,
+ hb_memory_mode_t mode,
+ void *user_data,
+ hb_destroy_func_t destroy
+) ;
+
+void hb_blob_destroy (
+ hb_blob_t *blob
+) ;
+
+hb_face_t * hb_face_create (
+ hb_blob_t *blob,
+ unsigned int index
+) ;
+
+void hb_face_destroy (
+ hb_face_t *face
+) ;
+
+hb_language_t hb_language_from_string (
+ const char *str,
+ int len
+) ;
+
+void hb_buffer_set_language (
+ hb_buffer_t *buffer,
+ hb_language_t language
+) ;
+
+hb_script_t hb_script_from_string (
+ const char *s,
+ int len
+) ;
+
+void hb_buffer_set_script (
+ hb_buffer_t *buffer,
+ hb_script_t script
+) ;
+
+hb_direction_t hb_direction_from_string (
+ const char *str,
+ int len
+) ;
+
+void hb_buffer_set_direction (
+ hb_buffer_t *buffer,
+ hb_direction_t direction
+) ;
+
+hb_bool_t hb_feature_from_string (
+ const char *str,
+ int len,
+ hb_feature_t *feature
+) ;
+
+hb_bool_t hb_shape_full (
+ hb_font_t *font,
+ hb_buffer_t *buffer,
+ const hb_feature_t *features,
+ unsigned int num_features,
+ const char * const *shaper_list
+) ;
+
+
+hb_buffer_t * hb_buffer_create (
+ void
+) ;
+
+void hb_buffer_destroy (
+ hb_buffer_t *buffer
+) ;
+
+void hb_buffer_add_utf8 (
+ hb_buffer_t *buffer,
+ const char *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length
+) ;
+
+void hb_buffer_add_utf32 (
+ hb_buffer_t *buffer,
+ const char *text,
+ int text_length,
+ unsigned int item_offset,
+ int item_length
+) ;
+
+void hb_buffer_add (
+ hb_buffer_t *buffer,
+ hb_codepoint_t codepoint,
+ unsigned int cluster
+) ;
+
+unsigned int hb_buffer_get_length (
+ hb_buffer_t *buffer
+) ;
+
+hb_glyph_info_t * hb_buffer_get_glyph_infos (
+ hb_buffer_t *buffer,
+ unsigned int *length
+) ;
+
+hb_glyph_position_t *hb_buffer_get_glyph_positions (
+ hb_buffer_t *buffer,
+ unsigned int *length
+) ;
+
+void hb_buffer_reverse (
+ hb_buffer_t *buffer
+) ;
+
+void hb_buffer_reset (
+ hb_buffer_t *buffer
+) ;
+
+void hb_buffer_guess_segment_properties (
+ hb_buffer_t *buffer
+) ;
+
+hb_font_t * hb_font_create (
+ hb_face_t *face
+) ;
+
+void hb_font_destroy (
+ hb_font_t *font
+) ;
+
+void hb_font_set_scale (
+ hb_font_t *font,
+ int x_scale,
+ int y_scale
+) ;
+
+void hb_ot_font_set_funcs (
+ hb_font_t *font
+) ;
+
+unsigned int hb_face_get_upem (
+ hb_face_t *face
+) ;
+
+const char ** hb_shape_list_shapers (
+ void
+);
+]]
+
+-- The library must be somewhere accessible. The calls to the library are similar to
+-- the ones in the prototype but we organize things a bit differently. I tried to alias
+-- the functions in the harfbuzz namespace (luajittex will optimize this anyway but
+-- normal luatex not) but it crashes luajittex so I revered that.
+
+do
+
+ local l = harfbuzz.hb_shape_list_shapers()
+ local s = { }
+
+ for i=0,9 do
+ local str = l[i]
+ if str == ffi.NULL then
+ break
+ else
+ s[#s+1] = ffi.string(str)
+ end
+ end
+
+ report("using hb library version %a, supported shapers: %,t",ffi.string(harfbuzz.hb_version_string()),s)
+
+end
+
+-- we don't want to store userdata in the public data blob
+
+local fontdata = fonts.hashes.identifiers
+
+local loaded = { }
+local shared = { }
+local featured = { }
+
+local function loadfont(font)
+ local tfmdata = fontdata[font]
+ local resources = tfmdata.resources
+ local filename = resources.filename
+ local instance = shared[filename]
+ if not instance then
+ local wholefont = io.loaddata(filename)
+ local wholeblob = ffi.gc(harfbuzz.hb_blob_create(wholefont,#wholefont,0,nil,nil),harfbuzz.hb_blob_destroy)
+ local wholeface = ffi.gc(harfbuzz.hb_face_create(wholeblob,font),harfbuzz.hb_face_destroy)
+ local scale = harfbuzz.hb_face_get_upem(wholeface)
+ instance = ffi.gc(harfbuzz.hb_font_create(wholeface),harfbuzz.hb_font_destroy)
+ harfbuzz.hb_font_set_scale(instance,scale,scale)
+ harfbuzz.hb_ot_font_set_funcs(instance)
+ shared[filename] = instance
+ end
+ return instance
+end
+
+local function loadfeatures(data)
+ local featureset = data.featureset or { }
+ local feature = ffi.new("hb_feature_t[?]",#featureset)
+ local featurespec = feature[0]
+ local noffeatures = 0
+ for i=1,#featureset do
+ local f = featureset[i]
+ harfbuzz.hb_feature_from_string(f,#f,feature[noffeatures])
+ noffeatures = noffeatures + 1
+ end
+ return {
+ noffeatures = #featureset,
+ featureblob = feature,
+ featurespec = featurespec,
+ }
+end
+
+local function crap(t)
+ return ffi.new("const char *[?]", #t, t)
+end
+
+local shapers = {
+ native = crap { "ot", "uniscribe", "fallback" },
+ uniscribe = crap { "uniscribe", "ot", "fallback" },
+ -- uniscribe = crap { "uniscribe", "fallback" }, -- stalls without fallback when no uniscribe present
+ fallback = crap { "fallback" },
+}
+
+-- Reusing a buffer doesn't make a difference in performance so we forget
+-- about it and keep things simple. Todo: check if using locals makes sense.
+
+function utilities.hb.methods.library(font,data,rlmode,text,leading,trailing)
+ local instance = loaded[font]
+ if not instance then
+ instance = loadfont(font)
+ loaded[font] = instance
+ end
+ -- todo: dflt -> DFLT ?
+ -- todo: whatever -> Whatever ?
+ local language = data.language or "dflt"
+ local script = data.script or "dflt"
+ local direction = rlmode < 0 and "rtl" or "ltr"
+ local shaper = shapers[data.shaper]
+ local featurehash = data.features
+ local featuredata = featured[featurehash]
+ if not featuredata then
+ featuredata = loadfeatures(data)
+ featured[featurehash] = featuredata
+ end
+
+ local buffer = ffi.gc(harfbuzz.hb_buffer_create(),harfbuzz.hb_buffer_destroy)
+
+ -- if false then
+ -- -- i have no time to look into this now but something like this should
+ -- -- be possible .. it probably doesn't make a difference in performance
+ -- local n = 0 -- here we also start at 0
+ -- if leading then
+ -- harfbuzz.hb_buffer_add(buffer,[todo: 0x20],n)
+ -- end
+ -- for i=1,#text do
+ -- n = n + 1
+ -- harfbuzz.hb_buffer_add(buffer,[todo: text[i] ],n)
+ -- end
+ -- if trailing then
+ -- n = n + 1
+ -- harfbuzz.hb_buffer_add(buffer,[todo: 0x20 ],n)
+ -- end
+ -- else
+ -- maybe also utf 8 clusters here like on the command line but i have no time
+ -- to figure that out
+ text = packtoutf32(text,leading,trailing)
+ local size = #text/4
+ text = text .. "\000\000\000\000\000\000\000\000" -- trial and error: avoid crash
+ harfbuzz.hb_buffer_add_utf32(buffer,text,#text,0,size)
+ -- end
+
+ -- maybe: hb_buffer_set_segment_properties(buffer,...)
+
+ harfbuzz.hb_buffer_set_language(buffer,harfbuzz.hb_language_from_string(language,#language))
+ harfbuzz.hb_buffer_set_script(buffer,harfbuzz.hb_script_from_string(script,#script))
+ harfbuzz.hb_buffer_set_direction(buffer,harfbuzz.hb_direction_from_string(direction,#direction))
+
+ harfbuzz.hb_buffer_guess_segment_properties(buffer) -- why is this needed (we already set them)
+ harfbuzz.hb_shape_full(instance,buffer,featuredata.featurespec,featuredata.noffeatures,shaper)
+
+ if rlmode < 0 then
+ harfbuzz.hb_buffer_reverse(buffer)
+ end
+
+ local size = harfbuzz.hb_buffer_get_length(buffer)
+ local infos = harfbuzz.hb_buffer_get_glyph_infos(buffer, nil)
+ local positions = harfbuzz.hb_buffer_get_glyph_positions(buffer, nil)
+
+ local result = { }
+ for i=1,size do
+ local info = infos[i-1]
+ local position = positions[i-1]
+ result[i] = {
+ info.codepoint,
+ info.cluster,
+ position.x_offset,
+ position.y_offset,
+ position.x_advance,
+ position.y_advance,
+ }
+ end
+ -- inspect(result)
+ return result
+
+end