summaryrefslogtreecommitdiff
path: root/source/luametatex/source/utilities/auxsparsearray.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/utilities/auxsparsearray.c')
-rw-r--r--source/luametatex/source/utilities/auxsparsearray.c623
1 files changed, 623 insertions, 0 deletions
diff --git a/source/luametatex/source/utilities/auxsparsearray.c b/source/luametatex/source/utilities/auxsparsearray.c
new file mode 100644
index 000000000..d9fa5e453
--- /dev/null
+++ b/source/luametatex/source/utilities/auxsparsearray.c
@@ -0,0 +1,623 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+/*tex
+
+ Here we implement sparse arrays with an embedded save stack. These functions are called very
+ often but a few days of experimenting proved that there is not much to gain (if at all) from
+ using macros or optimizations like preallocating and fast access to the first 128 entries. In
+ practice the overhead is mostly in accessing memory and not in (probably inlined) calls. So, we
+ should accept fate and wait for faster memory. It's the price we pay for being unicode on the
+ one hand and sparse on the other.
+
+*/
+
+# include "luametatex.h"
+
+sparse_state_info lmt_sparse_state = {
+ .sparse_data = {
+ .minimum = memory_data_unset,
+ .maximum = memory_data_unset,
+ .size = memory_data_unset,
+ .step = memory_data_unset,
+ .allocated = 0,
+ .itemsize = 1,
+ .top = memory_data_unset,
+ .ptr = memory_data_unset,
+ .initial = memory_data_unset,
+ .offset = 0,
+}
+};
+
+void *sa_malloc_array(int recordsize, int size)
+{
+ int allocated = recordsize * size;
+ lmt_sparse_state.sparse_data.allocated += allocated;
+ return lmt_memory_malloc((size_t) allocated);
+}
+
+void *sa_realloc_array(void *p, int recordsize, int size, int step)
+{
+ int deallocated = recordsize * size;
+ int allocated = recordsize * (size + step);
+ lmt_sparse_state.sparse_data.allocated += (allocated - deallocated);
+ return lmt_memory_realloc(p, (size_t) allocated);
+}
+
+void *sa_calloc_array(int recordsize, int size)
+{
+ int allocated = recordsize * size;
+ lmt_sparse_state.sparse_data.allocated += allocated;
+ return lmt_memory_calloc((size_t) size, recordsize);
+}
+
+void sa_wipe_array(void *head, int recordsize, int size)
+{
+ memset(head, 0, recordsize * ((size_t) size));
+}
+
+void *sa_free_array(void *p)
+{
+ lmt_memory_free(p);
+ return NULL;
+}
+
+/*tex
+
+ Once we have two variants allocated we can dump and undump a |LOWPART| array in one go. But
+ not yet. Currently the waste of one extra dummy int is cheaper than multiple functions.
+
+*/
+
+static void sa_aux_store_stack(sa_tree a, int n, sa_tree_item v1, sa_tree_item v2, int gl)
+{
+ sa_stack_item st;
+ st.code = n;
+ st.value_1 = v1;
+ st.value_2 = v2;
+ st.level = gl;
+ if (! a->stack) {
+ a->stack = sa_malloc_array(sizeof(sa_stack_item), a->sa_stack_size);
+ } else if (((a->sa_stack_ptr) + 1) >= a->sa_stack_size) {
+ a->stack = sa_realloc_array(a->stack, sizeof(sa_stack_item), a->sa_stack_size, a->sa_stack_step);
+ a->sa_stack_size += a->sa_stack_step;
+ }
+ (a->sa_stack_ptr)++;
+ a->stack[a->sa_stack_ptr] = st;
+}
+
+static void sa_aux_skip_in_stack(sa_tree a, int n)
+{
+ if (a->stack) {
+ int p = a->sa_stack_ptr;
+ while (p > 0) {
+ if (a->stack[p].code == n && a->stack[p].level > 0) {
+ a->stack[p].level = -(a->stack[p].level);
+ }
+ p--;
+ }
+ }
+}
+
+int sa_get_item_1(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ return head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4];
+ }
+ }
+ }
+ return (int) head->dflt.uchar_value[n%4];
+}
+
+int sa_get_item_2(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ return head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2];
+ }
+ }
+ }
+ return (int) head->dflt.ushort_value[n%2];
+}
+
+sa_tree_item sa_get_item_4(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ return head->tree[h][m][LMT_SA_L_PART(n)];
+ }
+ }
+ }
+ return head->dflt;
+}
+
+sa_tree_item sa_get_item_8(const sa_tree head, int n, sa_tree_item *v2)
+{
+ if (head->tree != NULL) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ int l = 2*LMT_SA_L_PART(n);
+ *v2 = head->tree[h][m][l+1];
+ return head->tree[h][m][l];
+ }
+ }
+ }
+ *v2 = head->dflt;
+ return head->dflt;
+}
+
+void sa_set_item_1(sa_tree head, int n, int v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/4);
+ for (int i = 0; i < LMT_SA_LOWPART/4; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l/4], (sa_tree_item) { 0 }, gl);
+ }
+ head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) v;
+}
+
+void sa_set_item_2(sa_tree head, int n, int v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/2);
+ for (int i = 0; i < LMT_SA_LOWPART/2; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l/2], (sa_tree_item) { 0 }, gl);
+ }
+ head->tree[h][m][l/2].ushort_value[n%2] = (unsigned short) v;
+}
+
+void sa_set_item_4(sa_tree head, int n, sa_tree_item v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART);
+ for (int i = 0; i < LMT_SA_LOWPART; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l], (sa_tree_item) { 0 }, gl);
+ }
+ head->tree[h][m][l] = v;
+}
+
+void sa_set_item_8(sa_tree head, int n, sa_tree_item v1, sa_tree_item v2, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = 2*LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), 2 * LMT_SA_LOWPART);
+ for (int i = 0; i < 2 * LMT_SA_LOWPART; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l], head->tree[h][m][l+1], gl);
+ }
+ head->tree[h][m][l] = v1;
+ head->tree[h][m][l+1] = v2;
+}
+
+void sa_set_item_n(sa_tree head, int n, int v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ int d = head->bytes == 1 ? 4 : (head->bytes == 2 ? 2 : 1);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/d);
+ for (int i = 0; i < LMT_SA_LOWPART/d; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l/d], (sa_tree_item) { 0 }, gl);
+ }
+ switch (head->bytes) {
+ case 1:
+ {
+ head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) (v < 0 ? 0 : (v > 0xFF ? 0xFF : v));
+ break;
+ }
+ case 2:
+ {
+ head->tree[h][m][l/2].ushort_value[n%2] = (unsigned char) (v < 0 ? 0 : (v > 0xFFFF ? 0xFFFF : v));
+ break;
+ }
+ case 4:
+ {
+ head->tree[h][m][l].int_value = v;
+ break;
+ }
+ }
+}
+
+int sa_get_item_n(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ switch (head->bytes) {
+ case 1 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4];
+ case 2 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2];
+ case 4 : return (int) head->tree[h][m][LMT_SA_L_PART(n) ].int_value;
+ }
+ }
+ }
+ }
+ switch (head->bytes) {
+ case 1 : return (int) head->dflt.uchar_value[n%4];
+ case 2 : return (int) head->dflt.ushort_value[n%2];
+ case 4 : return (int) head->dflt.int_value;
+ default: return 0;
+ }
+}
+
+/*
+void rawset_sa_item_4(sa_tree head, int n, sa_tree_item v)
+{
+ head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)] = v;
+}
+*/
+
+void sa_clear_stack(sa_tree a)
+{
+ if (a) {
+ a->stack = sa_free_array(a->stack);
+ a->sa_stack_ptr = 0;
+ a->sa_stack_size = a->sa_stack_step;
+ }
+}
+
+void sa_destroy_tree(sa_tree a)
+{
+ if (a) {
+ if (a->tree) {
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ if (a->tree[h]) {
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ a->tree[h][m] = sa_free_array(a->tree[h][m]);
+ }
+ a->tree[h] = sa_free_array(a->tree[h]);
+ }
+ }
+ a->tree = sa_free_array(a->tree);
+ }
+ a->stack = sa_free_array(a->stack);
+ a = sa_free_array(a);
+ }
+}
+
+sa_tree sa_copy_tree(sa_tree b)
+{
+ sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1);
+ a->sa_stack_step = b->sa_stack_step;
+ a->sa_stack_size = b->sa_stack_size;
+ a->bytes = b->bytes;
+ a->dflt = b->dflt;
+ a->stack = NULL;
+ a->sa_stack_ptr = 0;
+ a->tree = NULL;
+ if (b->tree) {
+ a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART);
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ if (b->tree[h]) {
+ int slide = LMT_SA_LOWPART;
+ switch (b->bytes) {
+ case 1: slide = LMT_SA_LOWPART/4; break;
+ case 2: slide = LMT_SA_LOWPART/2; break;
+ case 4: slide = LMT_SA_LOWPART ; break;
+ case 8: slide = 2*LMT_SA_LOWPART ; break;
+ }
+ a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART);
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ if (b->tree[h][m]) {
+ a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide);
+ memcpy(a->tree[h][m], b->tree[h][m], sizeof(sa_tree_item) * slide);
+ }
+ }
+ }
+ }
+ }
+ return a;
+}
+
+/*tex
+
+ The main reason to fill in the lowest entry branches here immediately is that most of the sparse
+ arrays have a bias toward \ASCII\ values. Allocating those here immediately improves the chance
+ of the structure |a->tree[0][0][x]| being close together in actual memory locations. We could
+ save less for type 0 stacks.
+
+*/
+
+sa_tree sa_new_tree(int size, int bytes, sa_tree_item dflt)
+{
+ sa_tree_head *a;
+ a = (sa_tree_head *) lmt_memory_malloc(sizeof(sa_tree_head));
+ a->dflt = dflt;
+ a->stack = NULL;
+ a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ a->tree[0] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ a->sa_stack_size = size;
+ a->sa_stack_step = size;
+ a->bytes = bytes;
+ a->sa_stack_ptr = 0;
+ return (sa_tree) a;
+}
+
+void sa_restore_stack(sa_tree head, int gl)
+{
+ if (head->stack) {
+ sa_stack_item st;
+ while (head->sa_stack_ptr > 0 && abs(head->stack[head->sa_stack_ptr].level) >= gl) {
+ st = head->stack[head->sa_stack_ptr];
+ if (st.level > 0) {
+ int code = st.code;
+ switch (head->bytes) {
+ case 1:
+ {
+ int c = code % 4;
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/4].uchar_value[c] = st.value_1.uchar_value[c];
+ }
+ break;
+ case 2:
+ {
+ int c = code % 2;
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/2].ushort_value[c] = st.value_1.ushort_value[c];
+ }
+ break;
+ case 4:
+ {
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)] = st.value_1;
+ }
+ break;
+ case 8:
+ {
+ int l = 2*LMT_SA_L_PART(code);
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l] = st.value_1;
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l+1] = st.value_2;
+ }
+ break;
+
+ }
+ }
+ (head->sa_stack_ptr)--;
+ }
+ }
+}
+
+void sa_dump_tree(dumpstream f, sa_tree a)
+{
+ dump_int(f, a->sa_stack_step);
+ dump_int(f, a->dflt.int_value);
+ if (a->tree) {
+ int bytes = a->bytes;
+ /*tex A marker: */
+ dump_via_int(f, 1);
+ dump_int(f, bytes);
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ if (a->tree[h]) {
+ dump_via_int(f, 1);
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ if (a->tree[h][m]) {
+ /*tex
+ It happens a lot that the value is the same as the index, for instance
+ with case mappings.
+
+ Using mode 3 for the case where all values are the default value saves
+ In \CONTEXT\ some 128 * 5 dumps which is not worth the trouble but it
+ is neat anyway.
+
+ 1 : values are kind of unique
+ 2 : for all values : value == self
+ 3 : for all values : value == default
+
+ Actually, we could decide not to save at all in the third mode because
+ unset equals default.
+ */
+ int mode = 1;
+ if (bytes != 8) {
+ /*tex Check for default values. */
+ int slide = bytes == 1 ? LMT_SA_LOWPART/4 : (bytes == 2 ? LMT_SA_LOWPART/2 : LMT_SA_LOWPART);
+ mode = 3;
+ for (int l = 0; l < slide; l++) {
+ if (a->tree[h][m][l].uint_value != a->dflt.uint_value) {
+ mode = 1;
+ break;
+ }
+ }
+ }
+ if (mode == 1 && bytes == 4) {
+ /*tex Check for identity values. */
+ unsigned int hm = h * LMT_SA_HIGHPART + m * LMT_SA_MIDPART * LMT_SA_LOWPART ;
+ mode = 2;
+ for (int l = 0; l < LMT_SA_LOWPART; l++) {
+ if (a->tree[h][m][l].uint_value == hm) {
+ hm++;
+ } else {
+ mode = 1;
+ break;
+ }
+ }
+ }
+ dump_int(f, mode);
+ if (mode == 1) {
+ /*tex
+ We have unique values. By avoiding this branch we save some 85 Kb
+ on the \CONTEXT\ format. We could actually save this property in
+ the tree but there is not that much to gain.
+ */
+ int slide = LMT_SA_LOWPART;
+ switch (bytes) {
+ case 1: slide = LMT_SA_LOWPART/4; break;
+ case 2: slide = LMT_SA_LOWPART/2; break;
+ case 4: slide = LMT_SA_LOWPART ; break;
+ case 8: slide = 2*LMT_SA_LOWPART ; break;
+ }
+ dump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide);
+ } else {
+ /*tex We have a self value or defaults. */
+ }
+ } else {
+ dump_via_int(f, 0);
+ }
+ }
+ } else {
+ dump_via_int(f, 0);
+ }
+ }
+ } else {
+ /*tex A marker: */
+ dump_via_int(f, 0);
+ }
+}
+
+sa_tree sa_undump_tree(dumpstream f)
+{
+ int x;
+ sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1);
+ undump_int(f,a->sa_stack_step);
+ undump_int(f,a->dflt.int_value);
+ a->sa_stack_size = a->sa_stack_step;
+ a->stack = sa_calloc_array(sizeof(sa_stack_item), a->sa_stack_size);
+ a->sa_stack_ptr = 0;
+ a->tree = NULL;
+ /*tex The marker: */
+ undump_int(f, x);
+ if (x != 0) {
+ int bytes, mode;
+ a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART);
+ undump_int(f, bytes);
+ a->bytes = bytes;
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ undump_int(f, mode); /* more a trigger */
+ if (mode > 0) {
+ a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART);
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ undump_int(f, mode);
+ switch (mode) {
+ case 1:
+ /*tex
+ We have a unique values.
+ */
+ {
+ int slide = LMT_SA_LOWPART;
+ switch (bytes) {
+ case 1: slide = LMT_SA_LOWPART/4; break;
+ case 2: slide = LMT_SA_LOWPART/2; break;
+ case 4: slide = LMT_SA_LOWPART ; break;
+ case 8: slide = 2*LMT_SA_LOWPART ; break;
+ }
+ a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide);
+ undump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide);
+ }
+ break;
+ case 2:
+ /*tex
+ We have a self value. We only have this when we have integers. Other
+ cases are math anyway, so not much to gain.
+ */
+ {
+ if (bytes == 4) {
+ int hm = h * 128 * LMT_SA_HIGHPART + m * LMT_SA_MIDPART;
+ a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART);
+ for (int l = 0; l < LMT_SA_LOWPART; l++) {
+ a->tree[h][m][l].int_value = hm;
+ hm++;
+ }
+ } else {
+ printf("\nfatal format error, mode %i, bytes %i\n", mode, bytes);
+ }
+ }
+ break;
+ case 3:
+ /*tex
+ We have all default values. so no need to set them. In fact, we
+ cannot even end up here.
+ */
+ break;
+ default:
+ /*tex
+ We have no values set.
+ */
+ break;
+ }
+ }
+ }
+ }
+ }
+ return a;
+}