summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/textextcodes.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/textextcodes.c')
-rw-r--r--source/luametatex/source/tex/textextcodes.c607
1 files changed, 607 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/textextcodes.c b/source/luametatex/source/tex/textextcodes.c
new file mode 100644
index 000000000..39fc258c7
--- /dev/null
+++ b/source/luametatex/source/tex/textextcodes.c
@@ -0,0 +1,607 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+/*tex
+
+ Contrary to traditional \TEX\ we have catcode tables so that we can switch catcode regimes very
+ fast. We can have many such regimes and they're stored in trees.
+
+*/
+
+# define CATCODESTACK 8
+# define CATCODEDEFAULT 12
+# define CATCODEDEFAULTS 0x0C0C0C0C /*tex Used as |dflt| value in |sa| struct. */
+
+typedef struct catcode_state_info {
+ sa_tree *catcode_heads;
+ unsigned char *catcode_valid;
+ int catcode_max;
+ int padding;
+} catcode_state_info;
+
+static catcode_state_info lmt_catcode_state = {
+ .catcode_heads = NULL,
+ .catcode_valid = NULL,
+ .catcode_max = 0,
+ .padding = 0,
+} ;
+
+static void tex_aux_allocate_catcodes(void)
+{
+ lmt_catcode_state.catcode_heads = sa_malloc_array(sizeof(sa_tree), max_n_of_catcode_tables);
+ lmt_catcode_state.catcode_valid = sa_malloc_array(sizeof(unsigned char), max_n_of_catcode_tables);
+ if (lmt_catcode_state.catcode_heads && lmt_catcode_state.catcode_valid) {
+ sa_wipe_array(lmt_catcode_state.catcode_heads, sizeof(sa_tree), max_n_of_catcode_tables);
+ sa_wipe_array(lmt_catcode_state.catcode_valid, sizeof(unsigned char), max_n_of_catcode_tables);
+ } else {
+ tex_overflow_error("catcodes", max_n_of_catcode_tables);
+ }
+}
+
+static void tex_aux_initialize_catcodes(void)
+{
+ sa_tree_item item = { .uint_value = CATCODEDEFAULTS };
+ lmt_catcode_state.catcode_max = 0;
+ tex_aux_allocate_catcodes();
+ lmt_catcode_state.catcode_valid[0] = 1;
+ lmt_catcode_state.catcode_heads[0] = sa_new_tree(CATCODESTACK, 1, item);
+}
+
+void tex_set_cat_code(int h, int n, halfword v, int gl)
+{
+ sa_tree_item item = { .uint_value = CATCODEDEFAULTS };
+ sa_tree tree = lmt_catcode_state.catcode_heads[h];
+ if (h > lmt_catcode_state.catcode_max) {
+ lmt_catcode_state.catcode_max = h;
+ }
+ if (! tree) {
+ tree = sa_new_tree(CATCODESTACK, 1, item);
+ lmt_catcode_state.catcode_heads[h] = tree;
+ }
+ sa_set_item_1(tree, n, v, gl);
+}
+
+halfword tex_get_cat_code(int h, int n)
+{
+ sa_tree_item item = { .uint_value = CATCODEDEFAULTS };
+ sa_tree tree = lmt_catcode_state.catcode_heads[h];
+ if (h > lmt_catcode_state.catcode_max) {
+ lmt_catcode_state.catcode_max = h;
+ }
+ if (! tree) {
+ tree = sa_new_tree(CATCODESTACK, 1, item);
+ lmt_catcode_state.catcode_heads[h] = tree;
+ }
+ return sa_return_item_1(tree, n);
+}
+
+void tex_unsave_cat_codes(int h, int gl)
+{
+ if (h > lmt_catcode_state.catcode_max) {
+ lmt_catcode_state.catcode_max = h;
+ }
+ for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
+ if (lmt_catcode_state.catcode_heads[k]) {
+ sa_restore_stack(lmt_catcode_state.catcode_heads[k], gl);
+ }
+ }
+}
+
+static void tex_aux_dump_catcodes(dumpstream f)
+{
+ int total = 0;
+ for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
+ if (lmt_catcode_state.catcode_valid[k]) {
+ total++;
+ }
+ }
+ dump_int(f, lmt_catcode_state.catcode_max);
+ dump_int(f, total);
+ for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
+ if (lmt_catcode_state.catcode_valid[k]) {
+ dump_int(f, k);
+ sa_dump_tree(f, lmt_catcode_state.catcode_heads[k]);
+ }
+ }
+}
+
+static void tex_aux_undump_catcodes(dumpstream f)
+{
+ int total;
+ sa_free_array(lmt_catcode_state.catcode_heads);
+ sa_free_array(lmt_catcode_state.catcode_valid);
+ tex_aux_allocate_catcodes();
+ undump_int(f, lmt_catcode_state.catcode_max);
+ undump_int(f, total);
+ for (int k = 0; k < total; k++) {
+ int x;
+ undump_int(f, x);
+ lmt_catcode_state.catcode_heads[x] = sa_undump_tree(f);
+ lmt_catcode_state.catcode_valid[x] = 1;
+ }
+}
+
+int tex_valid_catcode_table(int h)
+{
+ return (h >= 0 && h < max_n_of_catcode_tables && lmt_catcode_state.catcode_valid[h]);
+}
+
+void tex_copy_cat_codes(int from, int to)
+{
+ if (from < 0 || from >= max_n_of_catcode_tables || lmt_catcode_state.catcode_valid[from] == 0) {
+ exit(EXIT_FAILURE);
+ } else {
+ if (to > lmt_catcode_state.catcode_max) {
+ lmt_catcode_state.catcode_max = to;
+ }
+ sa_destroy_tree(lmt_catcode_state.catcode_heads[to]);
+ lmt_catcode_state.catcode_heads[to] = sa_copy_tree(lmt_catcode_state.catcode_heads[from]);
+ lmt_catcode_state.catcode_valid[to] = 1;
+ }
+}
+
+/*
+void set_cat_code_table_default(int h, int dflt)
+{
+ if (valid_catcode_table(h)) {
+ catcode_state.catcode_heads[h]->dflt.uchar_value[0] = (unsigned char) dflt;
+ catcode_state.catcode_heads[h]->dflt.uchar_value[1] = (unsigned char) dflt;
+ catcode_state.catcode_heads[h]->dflt.uchar_value[2] = (unsigned char) dflt;
+ catcode_state.catcode_heads[h]->dflt.uchar_value[3] = (unsigned char) dflt;
+ }
+}
+
+int get_cat_code_table_default(int h)
+{
+ if (valid_catcode_table(h)) {
+ return catcode_state.catcode_heads[h]->dflt.uchar_value[0];
+ } else {
+ return CATCODEDEFAULT;
+ }
+}
+*/
+
+void tex_initialize_cat_codes(int h)
+{
+ if (h > lmt_catcode_state.catcode_max) {
+ lmt_catcode_state.catcode_max = h;
+ }
+ sa_destroy_tree(lmt_catcode_state.catcode_heads[h]);
+ lmt_catcode_state.catcode_heads[h] = NULL;
+ tex_set_cat_code(h, '\r', end_line_cmd, 1);
+ tex_set_cat_code(h, ' ', spacer_cmd, 1);
+ tex_set_cat_code(h, '\\', escape_cmd, 1);
+ tex_set_cat_code(h, '%', comment_cmd, 1);
+ tex_set_cat_code(h, 127, invalid_char_cmd, 1);
+ tex_set_cat_code(h, 0, ignore_cmd, 1);
+ tex_set_cat_code(h, 0xFEFF, ignore_cmd, 1);
+ for (int k = 'A'; k <= 'Z'; k++) {
+ tex_set_cat_code(h, k, letter_cmd, 1);
+ tex_set_cat_code(h, k + 'a' - 'A', letter_cmd, 1);
+ }
+ lmt_catcode_state.catcode_valid[h] = 1;
+}
+
+static void tex_aux_free_catcodes(void)
+{
+ for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
+ if (lmt_catcode_state.catcode_valid[k]) {
+ sa_destroy_tree(lmt_catcode_state.catcode_heads[k]);
+ }
+ }
+ lmt_catcode_state.catcode_heads = sa_free_array(lmt_catcode_state.catcode_heads);
+ lmt_catcode_state.catcode_valid = sa_free_array(lmt_catcode_state.catcode_valid);
+}
+
+/*tex
+
+ The lowercase mapping codes are also stored in a tree. Let's keep them close for cache hits,
+ maybe also with hjcodes.
+
+*/
+
+# define LCCODESTACK 8
+# define LCCODEDEFAULT 0
+
+# define UCCODESTACK 8
+# define UCCODEDEFAULT 0
+
+# define SFCODESTACK 8
+# define SFCODEDEFAULT 1000
+
+# define HCCODESTACK 8
+# define HCCODEDEFAULT 0
+
+# define HMCODESTACK 8
+# define HMCODEDEFAULT 0
+
+typedef struct luscode_state_info {
+ sa_tree uccode_head;
+ sa_tree lccode_head;
+ sa_tree sfcode_head;
+ sa_tree hccode_head;
+ sa_tree hmcode_head;
+} luscode_state_info;
+
+static luscode_state_info lmt_luscode_state = {
+ .uccode_head = NULL,
+ .lccode_head = NULL,
+ .sfcode_head = NULL,
+ .hccode_head = NULL,
+ .hmcode_head = NULL
+};
+
+void tex_set_lc_code(int n, halfword v, int gl)
+{
+ sa_tree_item item;
+ item.int_value = v;
+ sa_set_item_4(lmt_luscode_state.lccode_head, n, item, gl);
+}
+
+halfword tex_get_lc_code(int n)
+{
+ return sa_return_item_4(lmt_luscode_state.lccode_head, n);
+}
+
+static void tex_aux_unsave_lccodes(int gl)
+{
+ sa_restore_stack(lmt_luscode_state.lccode_head, gl);
+}
+
+static void tex_aux_initialize_lccodes(void)
+{
+ sa_tree_item item;
+ item.int_value = LCCODEDEFAULT;
+ lmt_luscode_state.lccode_head = sa_new_tree(LCCODESTACK, 4, item);
+}
+
+static void tex_aux_dump_lccodes(dumpstream f)
+{
+ sa_dump_tree(f, lmt_luscode_state.lccode_head);
+}
+
+static void tex_aux_undump_lccodes(dumpstream f)
+{
+ lmt_luscode_state.lccode_head = sa_undump_tree(f);
+}
+
+static void tex_aux_free_lccodes(void)
+{
+ sa_destroy_tree(lmt_luscode_state.lccode_head);
+}
+
+/*tex
+
+ And the uppercase mapping codes are again stored in a tree.
+
+*/
+
+void tex_set_uc_code(int n, halfword v, int gl)
+{
+ sa_tree_item item;
+ item.int_value = v;
+ sa_set_item_4(lmt_luscode_state.uccode_head, n, item, gl);
+}
+
+halfword tex_get_uc_code(int n)
+{
+ return sa_return_item_4(lmt_luscode_state.uccode_head, n);
+}
+
+static void tex_aux_unsave_uccodes(int gl)
+{
+ sa_restore_stack(lmt_luscode_state.uccode_head, gl);
+}
+
+static void tex_aux_initialize_uccodes(void)
+{
+ sa_tree_item item = { .int_value = UCCODEDEFAULT };
+ lmt_luscode_state.uccode_head = sa_new_tree(UCCODESTACK, 4, item);
+}
+
+static void tex_aux_dump_uccodes(dumpstream f)
+{
+ sa_dump_tree(f,lmt_luscode_state.uccode_head);
+}
+
+static void tex_aux_undump_uccodes(dumpstream f)
+{
+ lmt_luscode_state.uccode_head = sa_undump_tree(f);
+}
+
+static void tex_aux_free_uccodes(void)
+{
+ sa_destroy_tree(lmt_luscode_state.uccode_head);
+}
+
+/*tex
+
+ By now it will be no surprise that the space factors get stored in a tree.
+
+*/
+
+void tex_set_sf_code(int n, halfword v, int gl)
+{
+ sa_tree_item item;
+ item.int_value = v;
+ sa_set_item_4(lmt_luscode_state.sfcode_head, n, item, gl);
+}
+
+halfword tex_get_sf_code(int n)
+{
+ return sa_return_item_4(lmt_luscode_state.sfcode_head, n);
+}
+
+static void tex_aux_unsave_sfcodes(int gl)
+{
+ sa_restore_stack(lmt_luscode_state.sfcode_head, gl);
+}
+
+static void tex_aux_initialize_sfcodes(void)
+{
+ sa_tree_item item = { .int_value = SFCODEDEFAULT };
+ lmt_luscode_state.sfcode_head = sa_new_tree(SFCODESTACK, 4, item);
+}
+
+static void tex_aux_dump_sfcodes(dumpstream f)
+{
+ sa_dump_tree(f, lmt_luscode_state.sfcode_head);
+}
+
+static void tex_aux_undump_sfcodes(dumpstream f)
+{
+ lmt_luscode_state.sfcode_head = sa_undump_tree(f);
+}
+
+static void tex_aux_free_sfcodes(void)
+{
+ sa_destroy_tree(lmt_luscode_state.sfcode_head);
+}
+
+/*tex
+
+ Finaly the hyphen character codes, a rather small sparse array.
+
+*/
+
+void tex_set_hc_code(int n, halfword v, int gl)
+{
+ sa_tree_item item;
+ item.int_value = v;
+ sa_set_item_4(lmt_luscode_state.hccode_head, n, item, gl);
+}
+
+halfword tex_get_hc_code(int n)
+{
+ return sa_return_item_4(lmt_luscode_state.hccode_head, n);
+}
+
+static void tex_aux_unsave_hccodes(int gl)
+{
+ sa_restore_stack(lmt_luscode_state.hccode_head, gl);
+}
+
+static void tex_aux_initialize_hccodes(void)
+{
+ sa_tree_item item = { .int_value = HCCODEDEFAULT };
+ lmt_luscode_state.hccode_head = sa_new_tree(HCCODESTACK, 4, item);
+}
+
+static void tex_aux_dump_hccodes(dumpstream f)
+{
+ sa_dump_tree(f, lmt_luscode_state.hccode_head);
+}
+
+static void tex_aux_undump_hccodes(dumpstream f)
+{
+ lmt_luscode_state.hccode_head = sa_undump_tree(f);
+}
+
+static void tex_aux_free_hccodes(void)
+{
+ sa_destroy_tree(lmt_luscode_state.hccode_head);
+}
+
+/*tex
+ The same is true for math hyphenation but here we have a small options set.
+*/
+
+void tex_set_hm_code(int n, halfword v, int gl)
+{
+ sa_set_item_1(lmt_luscode_state.hmcode_head, n, v, gl);
+}
+
+halfword tex_get_hm_code(int n)
+{
+ return sa_return_item_1(lmt_luscode_state.hmcode_head, n);
+}
+
+static void tex_aux_unsave_hmcodes(int gl)
+{
+ sa_restore_stack(lmt_luscode_state.hmcode_head, gl);
+}
+
+static void tex_aux_initialize_hmcodes(void)
+{
+ sa_tree_item item = { .int_value = HMCODEDEFAULT };
+ lmt_luscode_state.hmcode_head = sa_new_tree(HMCODESTACK, 1, item);
+}
+
+static void tex_aux_dump_hmcodes(dumpstream f)
+{
+ sa_dump_tree(f, lmt_luscode_state.hmcode_head);
+}
+
+static void tex_aux_undump_hmcodes(dumpstream f)
+{
+ lmt_luscode_state.hmcode_head = sa_undump_tree(f);
+}
+
+static void tex_aux_free_hmcodes(void)
+{
+ sa_destroy_tree(lmt_luscode_state.hmcode_head);
+}
+
+/*tex
+
+ The hyphenation codes are indeed stored in a tree and are used instead of lowercase codes when
+ deciding what characters to take into acccount when hyphenating. They are bound to upto
+ |HJCODE_MAX| languages. In the end I decided to put the hash pointer in the language record
+ so that we can do better lean memory management. Actually, the hjcode handling already was more
+ efficient than in \LUATEX\ because I kept track of usage and allocated (dumped) only the
+ languages that were used. A typical example of nicely cleaned up code that in the end was
+ ditched but that happens often (and of course goes unnoticed). Actually, in \CONTEXT\ we don't
+ dump language info at all, so I might as wel drop language dumping, just like fonts.
+
+*/
+
+# define HJCODESTACK 8
+# define HJCODEDEFAULT 0
+
+void tex_set_hj_code(int h, int n, halfword v, int gl)
+{
+ if (h >= 0 && h <= lmt_language_state.language_data.top) {
+ sa_tree_item item = { .int_value = HJCODEDEFAULT };
+ sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
+ if (! tree) {
+ tree = sa_new_tree(HJCODESTACK, 4, item);
+ lmt_language_state.languages[h]->hjcode_head = tree;
+ }
+ if (tree) {
+ item.int_value = (int) v;
+ sa_set_item_4(tree, n, item, gl);
+ }
+ }
+}
+
+/*tex We just return the lccodes when nothing is set. */
+
+halfword tex_get_hj_code(int h, int n)
+{
+ if (h >= 0 && h <= lmt_language_state.language_data.top) {
+ sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
+ if (! tree) {
+ tree = lmt_luscode_state.lccode_head;
+ }
+ return sa_return_item_4(tree, n);
+ } else {
+ return 0;
+ }
+}
+
+void tex_dump_language_hj_codes(dumpstream f, int h)
+{
+ if (h >= 0 && h <= lmt_language_state.language_data.top) {
+ sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
+ if (tree) {
+ dump_via_int(f, 1);
+ sa_dump_tree(f, tree);
+ } else {
+ dump_via_int(f, 0);
+ }
+ } else {
+ /* error */
+ }
+}
+
+void tex_undump_language_hj_codes(dumpstream f, int h)
+{
+ if (h >= 0 && h <= lmt_language_state.language_data.top) {
+ int x;
+ undump_int(f, x);
+ if (x) {
+ sa_free_array(lmt_language_state.languages[h]->hjcode_head);
+ lmt_language_state.languages[h]->hjcode_head = sa_undump_tree(f);
+ } else {
+ lmt_language_state.languages[h]->hjcode_head = NULL;
+ }
+ } else {
+ /* error */
+ }
+}
+
+void tex_hj_codes_from_lc_codes(int h)
+{
+ if (h >= 0 && h <= lmt_language_state.language_data.top) {
+ sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
+ if (tree) {
+ sa_destroy_tree(tree);
+ }
+ tree = sa_copy_tree(lmt_luscode_state.lccode_head);
+ lmt_language_state.languages[h]->hjcode_head = tree ? tree : NULL;
+ }
+}
+
+/*tex The public management functions. */
+
+void tex_unsave_text_codes(int grouplevel)
+{
+ tex_aux_unsave_lccodes(grouplevel);
+ tex_aux_unsave_uccodes(grouplevel);
+ tex_aux_unsave_sfcodes(grouplevel);
+ tex_aux_unsave_hccodes(grouplevel);
+ tex_aux_unsave_hmcodes(grouplevel);
+}
+
+void tex_initialize_text_codes(void)
+{
+ tex_aux_initialize_catcodes();
+ tex_aux_initialize_lccodes();
+ tex_aux_initialize_uccodes();
+ tex_aux_initialize_sfcodes();
+ tex_aux_initialize_hccodes();
+ tex_aux_initialize_hmcodes();
+ /* initializehjcodes(); */
+}
+
+void tex_free_text_codes(void)
+{
+ tex_aux_free_catcodes();
+ tex_aux_free_lccodes();
+ tex_aux_free_uccodes();
+ tex_aux_free_sfcodes();
+ tex_aux_free_hccodes();
+ tex_aux_free_hmcodes();
+ /* freehjcodes(); */
+}
+
+void tex_dump_text_codes(dumpstream f)
+{
+ tex_aux_dump_catcodes(f);
+ tex_aux_dump_lccodes(f);
+ tex_aux_dump_uccodes(f);
+ tex_aux_dump_sfcodes(f);
+ tex_aux_dump_hccodes(f);
+ tex_aux_dump_hmcodes(f);
+ /* dumphjcodes(f); */
+}
+
+void tex_undump_text_codes(dumpstream f)
+{
+ tex_aux_undump_catcodes(f);
+ tex_aux_undump_lccodes(f);
+ tex_aux_undump_uccodes(f);
+ tex_aux_undump_sfcodes(f);
+ tex_aux_undump_hccodes(f);
+ tex_aux_undump_hmcodes(f);
+ /* undumphjcodes(f); */
+}
+
+void tex_initialize_xx_codes(void)
+{
+ /*tex We're compatible. */
+ for (int u = 'A'; u <= 'Z'; u++) {
+ int l = u + 32;
+ tex_set_lc_code(u, l, level_one);
+ tex_set_lc_code(l, l, level_one);
+ tex_set_uc_code(u, u, level_one);
+ tex_set_uc_code(l, u, level_one);
+ tex_set_sf_code(u, 999, level_one);
+ }
+ /*tex A good start but not compatible. */
+ /* set_hc_code(0x002D, 0x002D, level_one); */
+ /* set_hc_code(0x2010, 0x2010, level_one); */
+} \ No newline at end of file