diff options
author | Hans Hagen <pragma@wxs.nl> | 2022-09-16 15:53:42 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg@phi-gamma.net> | 2022-09-16 15:53:42 +0200 |
commit | c161b7d6fe142231346cc1844e6e27c0ab7718c1 (patch) | |
tree | 3fd877b8986137703e987e4651a2db8e946a0f72 /source/luametatex/source/utilities/auxsparsearray.c | |
parent | e94fa4dc30ec28a6727aa85e17aaac18b76aeadb (diff) | |
download | context-c161b7d6fe142231346cc1844e6e27c0ab7718c1.tar.gz |
2022-09-16 14:41:00
Diffstat (limited to 'source/luametatex/source/utilities/auxsparsearray.c')
-rw-r--r-- | source/luametatex/source/utilities/auxsparsearray.c | 623 |
1 files changed, 623 insertions, 0 deletions
diff --git a/source/luametatex/source/utilities/auxsparsearray.c b/source/luametatex/source/utilities/auxsparsearray.c new file mode 100644 index 000000000..d9fa5e453 --- /dev/null +++ b/source/luametatex/source/utilities/auxsparsearray.c @@ -0,0 +1,623 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex + + Here we implement sparse arrays with an embedded save stack. These functions are called very + often but a few days of experimenting proved that there is not much to gain (if at all) from + using macros or optimizations like preallocating and fast access to the first 128 entries. In + practice the overhead is mostly in accessing memory and not in (probably inlined) calls. So, we + should accept fate and wait for faster memory. It's the price we pay for being unicode on the + one hand and sparse on the other. + +*/ + +# include "luametatex.h" + +sparse_state_info lmt_sparse_state = { + .sparse_data = { + .minimum = memory_data_unset, + .maximum = memory_data_unset, + .size = memory_data_unset, + .step = memory_data_unset, + .allocated = 0, + .itemsize = 1, + .top = memory_data_unset, + .ptr = memory_data_unset, + .initial = memory_data_unset, + .offset = 0, +} +}; + +void *sa_malloc_array(int recordsize, int size) +{ + int allocated = recordsize * size; + lmt_sparse_state.sparse_data.allocated += allocated; + return lmt_memory_malloc((size_t) allocated); +} + +void *sa_realloc_array(void *p, int recordsize, int size, int step) +{ + int deallocated = recordsize * size; + int allocated = recordsize * (size + step); + lmt_sparse_state.sparse_data.allocated += (allocated - deallocated); + return lmt_memory_realloc(p, (size_t) allocated); +} + +void *sa_calloc_array(int recordsize, int size) +{ + int allocated = recordsize * size; + lmt_sparse_state.sparse_data.allocated += allocated; + return lmt_memory_calloc((size_t) size, recordsize); +} + +void sa_wipe_array(void *head, int recordsize, int size) +{ + memset(head, 0, recordsize * ((size_t) size)); +} + +void *sa_free_array(void *p) +{ + lmt_memory_free(p); + return NULL; +} + +/*tex + + Once we have two variants allocated we can dump and undump a |LOWPART| array in one go. But + not yet. Currently the waste of one extra dummy int is cheaper than multiple functions. + +*/ + +static void sa_aux_store_stack(sa_tree a, int n, sa_tree_item v1, sa_tree_item v2, int gl) +{ + sa_stack_item st; + st.code = n; + st.value_1 = v1; + st.value_2 = v2; + st.level = gl; + if (! a->stack) { + a->stack = sa_malloc_array(sizeof(sa_stack_item), a->sa_stack_size); + } else if (((a->sa_stack_ptr) + 1) >= a->sa_stack_size) { + a->stack = sa_realloc_array(a->stack, sizeof(sa_stack_item), a->sa_stack_size, a->sa_stack_step); + a->sa_stack_size += a->sa_stack_step; + } + (a->sa_stack_ptr)++; + a->stack[a->sa_stack_ptr] = st; +} + +static void sa_aux_skip_in_stack(sa_tree a, int n) +{ + if (a->stack) { + int p = a->sa_stack_ptr; + while (p > 0) { + if (a->stack[p].code == n && a->stack[p].level > 0) { + a->stack[p].level = -(a->stack[p].level); + } + p--; + } + } +} + +int sa_get_item_1(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + return head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4]; + } + } + } + return (int) head->dflt.uchar_value[n%4]; +} + +int sa_get_item_2(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + return head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2]; + } + } + } + return (int) head->dflt.ushort_value[n%2]; +} + +sa_tree_item sa_get_item_4(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + return head->tree[h][m][LMT_SA_L_PART(n)]; + } + } + } + return head->dflt; +} + +sa_tree_item sa_get_item_8(const sa_tree head, int n, sa_tree_item *v2) +{ + if (head->tree != NULL) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + int l = 2*LMT_SA_L_PART(n); + *v2 = head->tree[h][m][l+1]; + return head->tree[h][m][l]; + } + } + } + *v2 = head->dflt; + return head->dflt; +} + +void sa_set_item_1(sa_tree head, int n, int v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/4); + for (int i = 0; i < LMT_SA_LOWPART/4; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l/4], (sa_tree_item) { 0 }, gl); + } + head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) v; +} + +void sa_set_item_2(sa_tree head, int n, int v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/2); + for (int i = 0; i < LMT_SA_LOWPART/2; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l/2], (sa_tree_item) { 0 }, gl); + } + head->tree[h][m][l/2].ushort_value[n%2] = (unsigned short) v; +} + +void sa_set_item_4(sa_tree head, int n, sa_tree_item v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART); + for (int i = 0; i < LMT_SA_LOWPART; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l], (sa_tree_item) { 0 }, gl); + } + head->tree[h][m][l] = v; +} + +void sa_set_item_8(sa_tree head, int n, sa_tree_item v1, sa_tree_item v2, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = 2*LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), 2 * LMT_SA_LOWPART); + for (int i = 0; i < 2 * LMT_SA_LOWPART; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l], head->tree[h][m][l+1], gl); + } + head->tree[h][m][l] = v1; + head->tree[h][m][l+1] = v2; +} + +void sa_set_item_n(sa_tree head, int n, int v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + int d = head->bytes == 1 ? 4 : (head->bytes == 2 ? 2 : 1); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/d); + for (int i = 0; i < LMT_SA_LOWPART/d; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l/d], (sa_tree_item) { 0 }, gl); + } + switch (head->bytes) { + case 1: + { + head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) (v < 0 ? 0 : (v > 0xFF ? 0xFF : v)); + break; + } + case 2: + { + head->tree[h][m][l/2].ushort_value[n%2] = (unsigned char) (v < 0 ? 0 : (v > 0xFFFF ? 0xFFFF : v)); + break; + } + case 4: + { + head->tree[h][m][l].int_value = v; + break; + } + } +} + +int sa_get_item_n(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + switch (head->bytes) { + case 1 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4]; + case 2 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2]; + case 4 : return (int) head->tree[h][m][LMT_SA_L_PART(n) ].int_value; + } + } + } + } + switch (head->bytes) { + case 1 : return (int) head->dflt.uchar_value[n%4]; + case 2 : return (int) head->dflt.ushort_value[n%2]; + case 4 : return (int) head->dflt.int_value; + default: return 0; + } +} + +/* +void rawset_sa_item_4(sa_tree head, int n, sa_tree_item v) +{ + head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)] = v; +} +*/ + +void sa_clear_stack(sa_tree a) +{ + if (a) { + a->stack = sa_free_array(a->stack); + a->sa_stack_ptr = 0; + a->sa_stack_size = a->sa_stack_step; + } +} + +void sa_destroy_tree(sa_tree a) +{ + if (a) { + if (a->tree) { + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + if (a->tree[h]) { + for (int m = 0; m < LMT_SA_MIDPART; m++) { + a->tree[h][m] = sa_free_array(a->tree[h][m]); + } + a->tree[h] = sa_free_array(a->tree[h]); + } + } + a->tree = sa_free_array(a->tree); + } + a->stack = sa_free_array(a->stack); + a = sa_free_array(a); + } +} + +sa_tree sa_copy_tree(sa_tree b) +{ + sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1); + a->sa_stack_step = b->sa_stack_step; + a->sa_stack_size = b->sa_stack_size; + a->bytes = b->bytes; + a->dflt = b->dflt; + a->stack = NULL; + a->sa_stack_ptr = 0; + a->tree = NULL; + if (b->tree) { + a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART); + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + if (b->tree[h]) { + int slide = LMT_SA_LOWPART; + switch (b->bytes) { + case 1: slide = LMT_SA_LOWPART/4; break; + case 2: slide = LMT_SA_LOWPART/2; break; + case 4: slide = LMT_SA_LOWPART ; break; + case 8: slide = 2*LMT_SA_LOWPART ; break; + } + a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART); + for (int m = 0; m < LMT_SA_MIDPART; m++) { + if (b->tree[h][m]) { + a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide); + memcpy(a->tree[h][m], b->tree[h][m], sizeof(sa_tree_item) * slide); + } + } + } + } + } + return a; +} + +/*tex + + The main reason to fill in the lowest entry branches here immediately is that most of the sparse + arrays have a bias toward \ASCII\ values. Allocating those here immediately improves the chance + of the structure |a->tree[0][0][x]| being close together in actual memory locations. We could + save less for type 0 stacks. + +*/ + +sa_tree sa_new_tree(int size, int bytes, sa_tree_item dflt) +{ + sa_tree_head *a; + a = (sa_tree_head *) lmt_memory_malloc(sizeof(sa_tree_head)); + a->dflt = dflt; + a->stack = NULL; + a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + a->tree[0] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + a->sa_stack_size = size; + a->sa_stack_step = size; + a->bytes = bytes; + a->sa_stack_ptr = 0; + return (sa_tree) a; +} + +void sa_restore_stack(sa_tree head, int gl) +{ + if (head->stack) { + sa_stack_item st; + while (head->sa_stack_ptr > 0 && abs(head->stack[head->sa_stack_ptr].level) >= gl) { + st = head->stack[head->sa_stack_ptr]; + if (st.level > 0) { + int code = st.code; + switch (head->bytes) { + case 1: + { + int c = code % 4; + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/4].uchar_value[c] = st.value_1.uchar_value[c]; + } + break; + case 2: + { + int c = code % 2; + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/2].ushort_value[c] = st.value_1.ushort_value[c]; + } + break; + case 4: + { + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)] = st.value_1; + } + break; + case 8: + { + int l = 2*LMT_SA_L_PART(code); + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l] = st.value_1; + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l+1] = st.value_2; + } + break; + + } + } + (head->sa_stack_ptr)--; + } + } +} + +void sa_dump_tree(dumpstream f, sa_tree a) +{ + dump_int(f, a->sa_stack_step); + dump_int(f, a->dflt.int_value); + if (a->tree) { + int bytes = a->bytes; + /*tex A marker: */ + dump_via_int(f, 1); + dump_int(f, bytes); + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + if (a->tree[h]) { + dump_via_int(f, 1); + for (int m = 0; m < LMT_SA_MIDPART; m++) { + if (a->tree[h][m]) { + /*tex + It happens a lot that the value is the same as the index, for instance + with case mappings. + + Using mode 3 for the case where all values are the default value saves + In \CONTEXT\ some 128 * 5 dumps which is not worth the trouble but it + is neat anyway. + + 1 : values are kind of unique + 2 : for all values : value == self + 3 : for all values : value == default + + Actually, we could decide not to save at all in the third mode because + unset equals default. + */ + int mode = 1; + if (bytes != 8) { + /*tex Check for default values. */ + int slide = bytes == 1 ? LMT_SA_LOWPART/4 : (bytes == 2 ? LMT_SA_LOWPART/2 : LMT_SA_LOWPART); + mode = 3; + for (int l = 0; l < slide; l++) { + if (a->tree[h][m][l].uint_value != a->dflt.uint_value) { + mode = 1; + break; + } + } + } + if (mode == 1 && bytes == 4) { + /*tex Check for identity values. */ + unsigned int hm = h * LMT_SA_HIGHPART + m * LMT_SA_MIDPART * LMT_SA_LOWPART ; + mode = 2; + for (int l = 0; l < LMT_SA_LOWPART; l++) { + if (a->tree[h][m][l].uint_value == hm) { + hm++; + } else { + mode = 1; + break; + } + } + } + dump_int(f, mode); + if (mode == 1) { + /*tex + We have unique values. By avoiding this branch we save some 85 Kb + on the \CONTEXT\ format. We could actually save this property in + the tree but there is not that much to gain. + */ + int slide = LMT_SA_LOWPART; + switch (bytes) { + case 1: slide = LMT_SA_LOWPART/4; break; + case 2: slide = LMT_SA_LOWPART/2; break; + case 4: slide = LMT_SA_LOWPART ; break; + case 8: slide = 2*LMT_SA_LOWPART ; break; + } + dump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide); + } else { + /*tex We have a self value or defaults. */ + } + } else { + dump_via_int(f, 0); + } + } + } else { + dump_via_int(f, 0); + } + } + } else { + /*tex A marker: */ + dump_via_int(f, 0); + } +} + +sa_tree sa_undump_tree(dumpstream f) +{ + int x; + sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1); + undump_int(f,a->sa_stack_step); + undump_int(f,a->dflt.int_value); + a->sa_stack_size = a->sa_stack_step; + a->stack = sa_calloc_array(sizeof(sa_stack_item), a->sa_stack_size); + a->sa_stack_ptr = 0; + a->tree = NULL; + /*tex The marker: */ + undump_int(f, x); + if (x != 0) { + int bytes, mode; + a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART); + undump_int(f, bytes); + a->bytes = bytes; + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + undump_int(f, mode); /* more a trigger */ + if (mode > 0) { + a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART); + for (int m = 0; m < LMT_SA_MIDPART; m++) { + undump_int(f, mode); + switch (mode) { + case 1: + /*tex + We have a unique values. + */ + { + int slide = LMT_SA_LOWPART; + switch (bytes) { + case 1: slide = LMT_SA_LOWPART/4; break; + case 2: slide = LMT_SA_LOWPART/2; break; + case 4: slide = LMT_SA_LOWPART ; break; + case 8: slide = 2*LMT_SA_LOWPART ; break; + } + a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide); + undump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide); + } + break; + case 2: + /*tex + We have a self value. We only have this when we have integers. Other + cases are math anyway, so not much to gain. + */ + { + if (bytes == 4) { + int hm = h * 128 * LMT_SA_HIGHPART + m * LMT_SA_MIDPART; + a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART); + for (int l = 0; l < LMT_SA_LOWPART; l++) { + a->tree[h][m][l].int_value = hm; + hm++; + } + } else { + printf("\nfatal format error, mode %i, bytes %i\n", mode, bytes); + } + } + break; + case 3: + /*tex + We have all default values. so no need to set them. In fact, we + cannot even end up here. + */ + break; + default: + /*tex + We have no values set. + */ + break; + } + } + } + } + } + return a; +} |