diff options
Diffstat (limited to 'tex/context/base/mkiv/font-phb-imp-library.lua')
-rw-r--r-- | tex/context/base/mkiv/font-phb-imp-library.lua | 498 |
1 files changed, 498 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/font-phb-imp-library.lua b/tex/context/base/mkiv/font-phb-imp-library.lua new file mode 100644 index 000000000..fe991b023 --- /dev/null +++ b/tex/context/base/mkiv/font-phb-imp-library.lua @@ -0,0 +1,498 @@ +if not modules then modules = { } end modules ['font-phb-imp-library'] = { + version = 1.000, -- 2020.01.08, + comment = "companion to font-txt.mkiv", + original = "derived from a prototype by Kai Eigner", + author = "Hans Hagen", -- so don't blame KE + copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- The hb library comes in versions and the one I tested in 2016 was part of the inkscape +-- suite. In principle one can have incompatibilities due to updates but that is the nature +-- of a library. When a library ie expected one has better use the system version, if only +-- to make sure that different programs behave the same. +-- +-- The main reason for testing this approach was that when Idris was working on his fonts, +-- we wanted to know how different shapers deal with it and the hb command line program +-- could provide uniscribe output. For the context shaper uniscribe is the reference, also +-- because Idris started out with Volt a decade ago. +-- +-- We treat the lib as a black box as it should be. At some point Kai Eigner made an ffi +-- binding and that one was adapted to the plugin approach of context. It saved me the +-- trouble of looking at source files to figure it all out. Below is the adapted code. +-- +-- Keep in mind that this file is for mkiv only. It won't work in lmtx where instead of +-- ffi we use simple optional libraries with delayed bindings. In principle this mechanism +-- is generic but because other macropackages follow another route we don't spend time +-- on that code path here. + +local next, tonumber, pcall = next, tonumber, pcall +local reverse = table.reverse +local loaddata = io.loaddata + +local report = utilities.hb.report or print +local packtoutf32 = utilities.hb.helpers.packtoutf32 + +if not FFISUPPORTED or not ffi then + report("no ffi support") + return +elseif CONTEXTLMTXMODE and CONTEXTLMTXMODE > 0 then + report("no ffi support") + return +elseif not context then + return +end + +local harfbuzz = ffilib(os.name == "windows" and "libharfbuzz-0" or "libharfbuzz") + +if not harfbuzz then + report("no hb library found") + return +end + +-- jit.on() : on very long (hundreds of pages) it looks faster but +-- the normal font processor slows down ... this is consistent with +-- earlier observations that turning it on is often slower on these +-- one-shot tex runs (also because we don't use many math and/or +-- string helpers and therefore the faster vm of luajit gives most +-- benefits (given the patched hasher) + +-- Here is Kai's ffi mapping, a bit reorganized. We only define what we +-- need. I'm happy that Kai did the deciphering of the api that I could +-- then build upon. + +ffi.cdef [[ + +typedef struct hb_blob_t hb_blob_t ; + +typedef enum { + HB_MEMORY_MODE_DUPLICATE, + HB_MEMORY_MODE_READONLY, + HB_MEMORY_MODE_WRITABLE, + HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE +} hb_memory_mode_t ; + +typedef void (*hb_destroy_func_t) ( + void *user_data +) ; + +typedef struct hb_face_t hb_face_t ; + +typedef const struct hb_language_impl_t *hb_language_t ; + +typedef struct hb_buffer_t hb_buffer_t ; + +typedef enum { + HB_SCRIPT_COMMON, HB_SCRIPT_INHERITED, HB_SCRIPT_UNKNOWN, + + HB_SCRIPT_ARABIC, HB_SCRIPT_ARMENIAN, HB_SCRIPT_BENGALI, HB_SCRIPT_CYRILLIC, + HB_SCRIPT_DEVANAGARI, HB_SCRIPT_GEORGIAN, HB_SCRIPT_GREEK, + HB_SCRIPT_GUJARATI, HB_SCRIPT_GURMUKHI, HB_SCRIPT_HANGUL, HB_SCRIPT_HAN, + HB_SCRIPT_HEBREW, HB_SCRIPT_HIRAGANA, HB_SCRIPT_KANNADA, HB_SCRIPT_KATAKANA, + HB_SCRIPT_LAO, HB_SCRIPT_LATIN, HB_SCRIPT_MALAYALAM, HB_SCRIPT_ORIYA, + HB_SCRIPT_TAMIL, HB_SCRIPT_TELUGU, HB_SCRIPT_THAI, HB_SCRIPT_TIBETAN, + HB_SCRIPT_BOPOMOFO, HB_SCRIPT_BRAILLE, HB_SCRIPT_CANADIAN_SYLLABICS, + HB_SCRIPT_CHEROKEE, HB_SCRIPT_ETHIOPIC, HB_SCRIPT_KHMER, HB_SCRIPT_MONGOLIAN, + HB_SCRIPT_MYANMAR, HB_SCRIPT_OGHAM, HB_SCRIPT_RUNIC, HB_SCRIPT_SINHALA, + HB_SCRIPT_SYRIAC, HB_SCRIPT_THAANA, HB_SCRIPT_YI, HB_SCRIPT_DESERET, + HB_SCRIPT_GOTHIC, HB_SCRIPT_OLD_ITALIC, HB_SCRIPT_BUHID, HB_SCRIPT_HANUNOO, + HB_SCRIPT_TAGALOG, HB_SCRIPT_TAGBANWA, HB_SCRIPT_CYPRIOT, HB_SCRIPT_LIMBU, + HB_SCRIPT_LINEAR_B, HB_SCRIPT_OSMANYA, HB_SCRIPT_SHAVIAN, HB_SCRIPT_TAI_LE, + HB_SCRIPT_UGARITIC, HB_SCRIPT_BUGINESE, HB_SCRIPT_COPTIC, + HB_SCRIPT_GLAGOLITIC, HB_SCRIPT_KHAROSHTHI, HB_SCRIPT_NEW_TAI_LUE, + HB_SCRIPT_OLD_PERSIAN, HB_SCRIPT_SYLOTI_NAGRI, HB_SCRIPT_TIFINAGH, + HB_SCRIPT_BALINESE, HB_SCRIPT_CUNEIFORM, HB_SCRIPT_NKO, HB_SCRIPT_PHAGS_PA, + HB_SCRIPT_PHOENICIAN, HB_SCRIPT_CARIAN, HB_SCRIPT_CHAM, HB_SCRIPT_KAYAH_LI, + HB_SCRIPT_LEPCHA, HB_SCRIPT_LYCIAN, HB_SCRIPT_LYDIAN, HB_SCRIPT_OL_CHIKI, + HB_SCRIPT_REJANG, HB_SCRIPT_SAURASHTRA, HB_SCRIPT_SUNDANESE, HB_SCRIPT_VAI, + HB_SCRIPT_AVESTAN, HB_SCRIPT_BAMUM, HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, + HB_SCRIPT_IMPERIAL_ARAMAIC, HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, + HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, HB_SCRIPT_JAVANESE, HB_SCRIPT_KAITHI, + HB_SCRIPT_LISU, HB_SCRIPT_MEETEI_MAYEK, HB_SCRIPT_OLD_SOUTH_ARABIAN, + HB_SCRIPT_OLD_TURKIC, HB_SCRIPT_SAMARITAN, HB_SCRIPT_TAI_THAM, + HB_SCRIPT_TAI_VIET, HB_SCRIPT_BATAK, HB_SCRIPT_BRAHMI, HB_SCRIPT_MANDAIC, + HB_SCRIPT_CHAKMA, HB_SCRIPT_MEROITIC_CURSIVE, HB_SCRIPT_MEROITIC_HIEROGLYPHS, + HB_SCRIPT_MIAO, HB_SCRIPT_SHARADA, HB_SCRIPT_SORA_SOMPENG, HB_SCRIPT_TAKRI, + HB_SCRIPT_BASSA_VAH, HB_SCRIPT_CAUCASIAN_ALBANIAN, HB_SCRIPT_DUPLOYAN, + HB_SCRIPT_ELBASAN, HB_SCRIPT_GRANTHA, HB_SCRIPT_KHOJKI, HB_SCRIPT_KHUDAWADI, + HB_SCRIPT_LINEAR_A, HB_SCRIPT_MAHAJANI, HB_SCRIPT_MANICHAEAN, + HB_SCRIPT_MENDE_KIKAKUI, HB_SCRIPT_MODI, HB_SCRIPT_MRO, HB_SCRIPT_NABATAEAN, + HB_SCRIPT_OLD_NORTH_ARABIAN, HB_SCRIPT_OLD_PERMIC, HB_SCRIPT_PAHAWH_HMONG, + HB_SCRIPT_PALMYRENE, HB_SCRIPT_PAU_CIN_HAU, HB_SCRIPT_PSALTER_PAHLAVI, + HB_SCRIPT_SIDDHAM, HB_SCRIPT_TIRHUTA, HB_SCRIPT_WARANG_CITI, HB_SCRIPT_AHOM, + HB_SCRIPT_ANATOLIAN_HIEROGLYPHS, HB_SCRIPT_HATRAN, HB_SCRIPT_MULTANI, + HB_SCRIPT_OLD_HUNGARIAN, HB_SCRIPT_SIGNWRITING, HB_SCRIPT_ADLAM, + HB_SCRIPT_BHAIKSUKI, HB_SCRIPT_MARCHEN, HB_SCRIPT_OSAGE, HB_SCRIPT_TANGUT, + HB_SCRIPT_NEWA, HB_SCRIPT_MASARAM_GONDI, HB_SCRIPT_NUSHU, HB_SCRIPT_SOYOMBO, + HB_SCRIPT_ZANABAZAR_SQUARE, HB_SCRIPT_DOGRA, HB_SCRIPT_GUNJALA_GONDI, + HB_SCRIPT_HANIFI_ROHINGYA, HB_SCRIPT_MAKASAR, HB_SCRIPT_MEDEFAIDRIN, + HB_SCRIPT_OLD_SOGDIAN, HB_SCRIPT_SOGDIAN, HB_SCRIPT_ELYMAIC, + HB_SCRIPT_NANDINAGARI, HB_SCRIPT_NYIAKENG_PUACHUE_HMONG, HB_SCRIPT_WANCHO, + + HB_SCRIPT_INVALID, _HB_SCRIPT_MAX_VALUE, _HB_SCRIPT_MAX_VALUE_SIGNED, +} hb_script_t ; + +typedef enum { + HB_DIRECTION_INVALID, + HB_DIRECTION_LTR, + HB_DIRECTION_RTL, + HB_DIRECTION_TTB, + HB_DIRECTION_BTT +} hb_direction_t ; + +typedef int hb_bool_t ; + +typedef uint32_t hb_tag_t ; + +typedef struct hb_feature_t { + hb_tag_t tag; + uint32_t value; + unsigned int start; + unsigned int end; +} hb_feature_t ; + +typedef struct hb_font_t hb_font_t ; + +typedef uint32_t hb_codepoint_t ; +typedef int32_t hb_position_t ; +typedef uint32_t hb_mask_t ; + +typedef union _hb_var_int_t { + uint32_t u32; + int32_t i32; + uint16_t u16[2]; + int16_t i16[2]; + uint8_t u8[4]; + int8_t i8[4]; +} hb_var_int_t ; + +typedef struct hb_glyph_info_t { + hb_codepoint_t codepoint ; + hb_mask_t mask ; + uint32_t cluster ; + /*< private >*/ + hb_var_int_t var1 ; + hb_var_int_t var2 ; +} hb_glyph_info_t ; + +typedef struct hb_glyph_position_t { + hb_position_t x_advance ; + hb_position_t y_advance ; + hb_position_t x_offset ; + hb_position_t y_offset ; + /*< private >*/ + hb_var_int_t var ; +} hb_glyph_position_t ; + +const char * hb_version_string ( + void +) ; + +hb_blob_t * hb_blob_create ( + const char *data, + unsigned int length, + hb_memory_mode_t mode, + void *user_data, + hb_destroy_func_t destroy +) ; + +void hb_blob_destroy ( + hb_blob_t *blob +) ; + +hb_face_t * hb_face_create ( + hb_blob_t *blob, + unsigned int index +) ; + +void hb_face_destroy ( + hb_face_t *face +) ; + +hb_language_t hb_language_from_string ( + const char *str, + int len +) ; + +void hb_buffer_set_language ( + hb_buffer_t *buffer, + hb_language_t language +) ; + +hb_script_t hb_script_from_string ( + const char *s, + int len +) ; + +void hb_buffer_set_script ( + hb_buffer_t *buffer, + hb_script_t script +) ; + +hb_direction_t hb_direction_from_string ( + const char *str, + int len +) ; + +void hb_buffer_set_direction ( + hb_buffer_t *buffer, + hb_direction_t direction +) ; + +hb_bool_t hb_feature_from_string ( + const char *str, + int len, + hb_feature_t *feature +) ; + +hb_bool_t hb_shape_full ( + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features, + const char * const *shaper_list +) ; + + +hb_buffer_t * hb_buffer_create ( + void +) ; + +void hb_buffer_destroy ( + hb_buffer_t *buffer +) ; + +void hb_buffer_add_utf8 ( + hb_buffer_t *buffer, + const char *text, + int text_length, + unsigned int item_offset, + int item_length +) ; + +void hb_buffer_add_utf32 ( + hb_buffer_t *buffer, + const char *text, + int text_length, + unsigned int item_offset, + int item_length +) ; + +void hb_buffer_add ( + hb_buffer_t *buffer, + hb_codepoint_t codepoint, + unsigned int cluster +) ; + +unsigned int hb_buffer_get_length ( + hb_buffer_t *buffer +) ; + +hb_glyph_info_t * hb_buffer_get_glyph_infos ( + hb_buffer_t *buffer, + unsigned int *length +) ; + +hb_glyph_position_t *hb_buffer_get_glyph_positions ( + hb_buffer_t *buffer, + unsigned int *length +) ; + +void hb_buffer_reverse ( + hb_buffer_t *buffer +) ; + +void hb_buffer_reset ( + hb_buffer_t *buffer +) ; + +void hb_buffer_guess_segment_properties ( + hb_buffer_t *buffer +) ; + +hb_font_t * hb_font_create ( + hb_face_t *face +) ; + +void hb_font_destroy ( + hb_font_t *font +) ; + +void hb_font_set_scale ( + hb_font_t *font, + int x_scale, + int y_scale +) ; + +void hb_ot_font_set_funcs ( + hb_font_t *font +) ; + +unsigned int hb_face_get_upem ( + hb_face_t *face +) ; + +const char ** hb_shape_list_shapers ( + void +); +]] + +-- The library must be somewhere accessible. The calls to the library are similar to +-- the ones in the prototype but we organize things a bit differently. I tried to alias +-- the functions in the harfbuzz namespace (luajittex will optimize this anyway but +-- normal luatex not) but it crashes luajittex so I revered that. + +do + + local l = harfbuzz.hb_shape_list_shapers() + local s = { } + + for i=0,9 do + local str = l[i] + if str == ffi.NULL then + break + else + s[#s+1] = ffi.string(str) + end + end + + report("using hb library version %a, supported shapers: %,t",ffi.string(harfbuzz.hb_version_string()),s) + +end + +-- we don't want to store userdata in the public data blob + +local fontdata = fonts.hashes.identifiers + +local loaded = { } +local shared = { } +local featured = { } + +local function loadfont(font) + local tfmdata = fontdata[font] + local resources = tfmdata.resources + local filename = resources.filename + local instance = shared[filename] + if not instance then + local wholefont = io.loaddata(filename) + local wholeblob = ffi.gc(harfbuzz.hb_blob_create(wholefont,#wholefont,0,nil,nil),harfbuzz.hb_blob_destroy) + local wholeface = ffi.gc(harfbuzz.hb_face_create(wholeblob,font),harfbuzz.hb_face_destroy) + local scale = harfbuzz.hb_face_get_upem(wholeface) + instance = ffi.gc(harfbuzz.hb_font_create(wholeface),harfbuzz.hb_font_destroy) + harfbuzz.hb_font_set_scale(instance,scale,scale) + harfbuzz.hb_ot_font_set_funcs(instance) + shared[filename] = instance + end + return instance +end + +local function loadfeatures(data) + local featureset = data.featureset or { } + local feature = ffi.new("hb_feature_t[?]",#featureset) + local featurespec = feature[0] + local noffeatures = 0 + for i=1,#featureset do + local f = featureset[i] + harfbuzz.hb_feature_from_string(f,#f,feature[noffeatures]) + noffeatures = noffeatures + 1 + end + return { + noffeatures = #featureset, + featureblob = feature, + featurespec = featurespec, + } +end + +local function crap(t) + return ffi.new("const char *[?]", #t, t) +end + +local shapers = { + native = crap { "ot", "uniscribe", "fallback" }, + uniscribe = crap { "uniscribe", "ot", "fallback" }, + -- uniscribe = crap { "uniscribe", "fallback" }, -- stalls without fallback when no uniscribe present + fallback = crap { "fallback" }, +} + +-- Reusing a buffer doesn't make a difference in performance so we forget +-- about it and keep things simple. Todo: check if using locals makes sense. + +function utilities.hb.methods.library(font,data,rlmode,text,leading,trailing) + local instance = loaded[font] + if not instance then + instance = loadfont(font) + loaded[font] = instance + end + -- todo: dflt -> DFLT ? + -- todo: whatever -> Whatever ? + local language = data.language or "dflt" + local script = data.script or "dflt" + local direction = rlmode < 0 and "rtl" or "ltr" + local shaper = shapers[data.shaper] + local featurehash = data.features + local featuredata = featured[featurehash] + if not featuredata then + featuredata = loadfeatures(data) + featured[featurehash] = featuredata + end + + local buffer = ffi.gc(harfbuzz.hb_buffer_create(),harfbuzz.hb_buffer_destroy) + + -- if false then + -- -- i have no time to look into this now but something like this should + -- -- be possible .. it probably doesn't make a difference in performance + -- local n = 0 -- here we also start at 0 + -- if leading then + -- harfbuzz.hb_buffer_add(buffer,[todo: 0x20],n) + -- end + -- for i=1,#text do + -- n = n + 1 + -- harfbuzz.hb_buffer_add(buffer,[todo: text[i] ],n) + -- end + -- if trailing then + -- n = n + 1 + -- harfbuzz.hb_buffer_add(buffer,[todo: 0x20 ],n) + -- end + -- else + -- maybe also utf 8 clusters here like on the command line but i have no time + -- to figure that out + text = packtoutf32(text,leading,trailing) + local size = #text/4 + text = text .. "\000\000\000\000\000\000\000\000" -- trial and error: avoid crash + harfbuzz.hb_buffer_add_utf32(buffer,text,#text,0,size) + -- end + + -- maybe: hb_buffer_set_segment_properties(buffer,...) + + harfbuzz.hb_buffer_set_language(buffer,harfbuzz.hb_language_from_string(language,#language)) + harfbuzz.hb_buffer_set_script(buffer,harfbuzz.hb_script_from_string(script,#script)) + harfbuzz.hb_buffer_set_direction(buffer,harfbuzz.hb_direction_from_string(direction,#direction)) + + harfbuzz.hb_buffer_guess_segment_properties(buffer) -- why is this needed (we already set them) + harfbuzz.hb_shape_full(instance,buffer,featuredata.featurespec,featuredata.noffeatures,shaper) + + if rlmode < 0 then + harfbuzz.hb_buffer_reverse(buffer) + end + + local size = harfbuzz.hb_buffer_get_length(buffer) + local infos = harfbuzz.hb_buffer_get_glyph_infos(buffer, nil) + local positions = harfbuzz.hb_buffer_get_glyph_positions(buffer, nil) + + local result = { } + for i=1,size do + local info = infos[i-1] + local position = positions[i-1] + result[i] = { + info.codepoint, + info.cluster, + position.x_offset, + position.y_offset, + position.x_advance, + position.y_advance, + } + end + -- inspect(result) + return result + +end |