/* See license.txt in the root of this project. */ # include "luametatex.h" /*tex We come now to what is probably the most interesting algorithm of \TEX: the mechanism for choosing the \quote {best possible} breakpoints that yield the individual lines of a paragraph. \TEX's line-breaking algorithm takes a given horizontal list and converts it to a sequence of boxes that are appended to the current vertical list. In the course of doing this, it creates a special data structure containing three kinds of records that are not used elsewhere in \TEX. Such nodes are created while a paragraph is being processed, and they are destroyed afterwards; thus, the other parts of \TEX\ do not need to know anything about how line-breaking is done. The method used here is based on an approach devised by Michael F. Plass and the author in 1977, subsequently generalized and improved by the same two people in 1980. A detailed discussion appears in {\sl SOFTWARE---Practice \AM\ Experience \bf11} (1981), 1119--1184, where it is shown that the line-breaking problem can be regarded as a special case of the problem of computing the shortest path in an acyclic network. The cited paper includes numerous examples and describes the history of line breaking as it has been practiced by printers through the ages. The present implementation adds two new ideas to the algorithm of 1980: Memory space requirements are considerably reduced by using smaller records for inactive nodes than for active ones, and arithmetic overflow is avoided by using \quote {delta distances} instead of keeping track of the total distance from the beginning of the paragraph to the current point. The |line_break| procedure should be invoked only in horizontal mode; it leaves that mode and places its output into the current vlist of the enclosing vertical mode (or internal vertical mode). There is one explicit parameter: |d| is true for partial paragraphs preceding display math mode; in this case the amount of additional penalty inserted before the final line is |display_widow_penalty| instead of |widow_penalty|. There are also a number of implicit parameters: The hlist to be broken starts at |node_next (head)|, and it is nonempty. The value of |prev_graf| in the enclosing semantic level tells where the paragraph should begin in the sequence of line numbers, in case hanging indentation or |\parshape| are in use; |prev_graf| is zero unless this paragraph is being continued after a displayed formula. Other implicit parameters, such as the |par_shape_ptr| and various penalties to use for hyphenation, etc., appear in |eqtb|. After |line_break| has acted, it will have updated the current vlist and the value of |prev_graf|. Furthermore, the global variable |just_box| will point to the final box created by |line_break|, so that the width of this line can be ascertained when it is necessary to decide whether to use |above_display_skip| or |above_display_short_skip| before a displayed formula. We have an additional parameter |\parfillleftskip| and below we cheat a bit. We add two glue nodes so that the par builder will work the same and doesn't need to be adapted, but when we're done we move the leftbound node to the beginning of the (last) line. Todo: change some variable names to more meaningful ones so that the code is easier to understand. (Remark for myself: the lua variant that i use for playing around occasionally is not in sync with the code here!) I played a bit with prerolling: make a copy, run the par builder, afterwards collect the result in a box that then can be consulted: wd, ht, dp, quality, hyphens, and especially shape fitting (which was the reason, because |\hangafter| assumes lines and esp with math a line is somewhat unpredictable so we get bad fitting). In the end we decided that it was kind of useless because of the unlikely usage scenario. But I might pick up on it. Of course it can be done in \LUA\ but we don't want the associated performance hit (management overhead) and dealing with (progressive) solution oscillating is also an issue. */ linebreak_state_info lmt_linebreak_state = { .just_box = 0, .last_line_fill = 0, .no_shrink_error_yet = 0, .second_pass = 0, .final_pass = 0, .threshold = 0, .adjust_spacing = 0, .adjust_spacing_step = 0, .adjust_spacing_shrink = 0, .adjust_spacing_stretch = 0, .max_stretch_ratio = 0, .max_shrink_ratio = 0, .current_font_step = 0, .passive = 0, .printed_node = 0, .pass_number = 0, .active_width = { 0 }, .background = { 0 }, .break_width = { 0 }, // .auto_breaking = 0, // .math_level = 0, .internal_penalty_interline = 0, .internal_penalty_broken = 0, .internal_left_box = null, .internal_left_box_width = 0, .init_internal_left_box = 0, .init_internal_left_box_width = 0, .internal_right_box = null, .internal_right_box_width = 0, .internal_middle_box = null, .disc_width = { 0 }, .minimal_demerits = { 0 }, .minimum_demerits = 0, .easy_line = 0, .last_special_line = 0, .first_width = 0, .second_width = 0, .first_indent = 0, .second_indent = 0, .best_bet = 0, .fewest_demerits = 0, .best_line = 0, .actual_looseness = 0, .line_difference = 0, .do_last_line_fit = 0, .fill_width = { 0 }, .dir_ptr = 0, .warned = 0, .calling_back = 0, }; /*tex We could use a bit larger array and glue_orders where normal starts at 0 so we then need a larger array. Let's not do that now. */ typedef enum fill_orders { fi_order = 0, fil_order = 1, fill_order = 2, filll_order = 3, } fill_orders; /*tex The |just_box| variable has the |hlist_node| for the last line of the new paragraph. In it's complete form, |line_break| is a rather lengthy procedure --- sort of a small world unto itself --- we must build it up little by little. Below you see only the general outline. The main task performed here is to move the list from |head| to |temp_head| and go into the enclosing semantic level. We also append the |\parfillskip| glue to the end of the paragraph, removing a space (or other glue node) if it was there, since spaces usually precede blank lines and instances of |$$|. The |par_fill_skip| is preceded by an infinite penalty, so it will never be considered as a potential breakpoint. */ void tex_line_break_prepare( halfword par, halfword *tail, halfword *parinit_left_skip_glue, halfword *parinit_right_skip_glue, halfword *parfill_left_skip_glue, halfword *parfill_right_skip_glue, halfword *final_penalty ) { /* too much testing of next */ if (node_type(par) == par_node) { if (tracing_linebreak_lists) { tex_begin_diagnostic(); tex_print_format("[linebreak: prepare, before]"); tex_show_box(par); tex_end_diagnostic(); } *tail = *tail ? *tail : tex_tail_of_node_list(par); *final_penalty = tex_new_penalty_node(infinite_penalty, line_penalty_subtype); *parfill_left_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_fill_left_skip_code), par_fill_left_skip_glue); *parfill_right_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_fill_right_skip_code), par_fill_right_skip_glue); *parinit_left_skip_glue = null; *parinit_right_skip_glue = null; if (par != *tail && node_type(*tail) == glue_node && ! tex_is_par_init_glue(*tail)) { halfword prev = node_prev(*tail); node_next(prev) = null; tex_flush_node(*tail); *tail = prev; } tex_attach_attribute_list_copy(*final_penalty, par); tex_attach_attribute_list_copy(*parfill_left_skip_glue, par); tex_attach_attribute_list_copy(*parfill_right_skip_glue, par); tex_try_couple_nodes(*tail, *final_penalty); tex_try_couple_nodes(*final_penalty, *parfill_left_skip_glue); tex_try_couple_nodes(*parfill_left_skip_glue, *parfill_right_skip_glue); *tail = *parfill_right_skip_glue; if (node_next(par)) { halfword p = par; halfword n = node_next(par); while (node_next(p) && node_type(node_next(p)) == dir_node) { p = node_next(p); } while (n) { if (node_type(n) == glue_node && node_subtype(n) == indent_skip_glue) { *parinit_left_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_init_left_skip_code), par_init_left_skip_glue); *parinit_right_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_init_right_skip_code), par_init_right_skip_glue); tex_attach_attribute_list_copy(*parinit_left_skip_glue, par); tex_attach_attribute_list_copy(*parinit_right_skip_glue, par); tex_try_couple_nodes(*parinit_right_skip_glue, n); tex_try_couple_nodes(*parinit_left_skip_glue, *parinit_right_skip_glue); // tex_try_couple_nodes(par, *parinit_left_skip_glue); tex_try_couple_nodes(p, *parinit_left_skip_glue); break; } else { n = node_next(n); /* sort of weird and tricky */ } } } if (tracing_linebreak_lists) { tex_begin_diagnostic(); tex_print_format("[linebreak: prepare, after]"); tex_show_box(par); tex_end_diagnostic(); } } } void tex_line_break(int d, int line_break_context) { halfword head = node_next(cur_list.head); /*tex There should be a local par node at the beginning! */ if (node_type(head) == par_node) { /*tex We need this for over- or underfull box messages. */ halfword tail = cur_list.tail; lmt_packaging_state.pack_begin_line = cur_list.mode_line; node_prev(head) = null; /*tex Hyphenate, driven by callback or fallback to normal \TEX. */ if (tex_list_has_glyph(head)) { tex_handle_hyphenation(head, tail); head = tex_handle_glyphrun(head, line_break_context, par_dir(head)); tail = tex_tail_of_node_list(head); tex_try_couple_nodes(cur_list.head, head); cur_list.tail = tail; } /*tex We remove (only one) trailing glue node, when present. */ // if (head != tail && node_type(tail) == glue_node && ! tex_is_par_init_glue(tail)) { // halfword prev = node_prev(tail); // node_next(prev) = null; // tex_flush_node(tail); // cur_list.tail = prev; // } node_next(temp_head) = head; /*tex There should be a local par node at the beginning! */ if (node_type(head) == par_node) { /*tex The tail thing is a bit weird here as it's not the tail. One day I will look into this. One complication is that we have the normal break routing or a callback that replaces it but that callback can call the normal routine itself with specific parameters set. */ halfword start_of_par; halfword par = head; halfword parinit_left_skip_glue = null; halfword parinit_right_skip_glue = null; halfword parfill_left_skip_glue = null; halfword parfill_right_skip_glue = null; halfword final_penalty = null; tex_line_break_prepare(par, &tail, &parinit_left_skip_glue, &parinit_right_skip_glue, &parfill_left_skip_glue, &parfill_right_skip_glue, &final_penalty); cur_list.tail = tail; /*tex We start with a prepared list. If you mess with that the linebreak routine might not work well especially if the pointers are messed up. So be it. */ lmt_node_filter_callback(pre_linebreak_filter_callback, line_break_context, temp_head, &(cur_list.tail)); /*tex We assume that the list is still okay. */ lmt_linebreak_state.last_line_fill = cur_list.tail; tex_pop_nest(); start_of_par = cur_list.tail; lmt_linebreak_state.calling_back = 1; if (lmt_linebreak_callback(d, temp_head, &(cur_list.tail))) { /*tex When we end up here we have a prepared list so we need to make sure that when the callback usaes that list with the built in break routine we don't do that twice. One should work on copies! Afterwards we need to find the correct value for the |just_box|. */ halfword box_search = cur_list.tail; lmt_linebreak_state.just_box = null; if (box_search) { do { if (node_type(box_search) == hlist_node) { lmt_linebreak_state.just_box = box_search; } box_search = node_next(box_search); } while (box_search); } if (! lmt_linebreak_state.just_box) { tex_handle_error( succumb_error_type, "Invalid linebreak_filter", "A linebreaking routine should return a non-empty list of nodes and at least one\n" "of those has to be a \\hbox. Sorry, I cannot recover from this." ); } } else { line_break_properties properties = { .initial_par = par, .display_math = d, .tracing_paragraphs = tracing_paragraphs_par, .paragraph_dir = par_dir(par), .parfill_left_skip = parfill_left_skip_glue, .parfill_right_skip = parfill_right_skip_glue, .parinit_left_skip = parinit_left_skip_glue, .parinit_right_skip = parinit_right_skip_glue, .pretolerance = tex_get_par_par(par, par_pre_tolerance_code), .tolerance = tex_get_par_par(par, par_tolerance_code), .emergency_stretch = tex_get_par_par(par, par_emergency_stretch_code), .looseness = tex_get_par_par(par, par_looseness_code), .adjust_spacing = tex_get_par_par(par, par_adjust_spacing_code), .protrude_chars = tex_get_par_par(par, par_protrude_chars_code), .adj_demerits = tex_get_par_par(par, par_adj_demerits_code), .line_penalty = tex_get_par_par(par, par_line_penalty_code), .last_line_fit = tex_get_par_par(par, par_last_line_fit_code), .double_hyphen_demerits = tex_get_par_par(par, par_double_hyphen_demerits_code), .final_hyphen_demerits = tex_get_par_par(par, par_final_hyphen_demerits_code), .hsize = tex_get_par_par(par, par_hsize_code), .left_skip = tex_get_par_par(par, par_left_skip_code), .right_skip = tex_get_par_par(par, par_right_skip_code), .hang_indent = tex_get_par_par(par, par_hang_indent_code), .hang_after = tex_get_par_par(par, par_hang_after_code), .par_shape = tex_get_par_par(par, par_par_shape_code), .inter_line_penalty = tex_get_par_par(par, par_inter_line_penalty_code), .inter_line_penalties = tex_get_par_par(par, par_inter_line_penalties_code), .club_penalty = tex_get_par_par(par, par_club_penalty_code), .club_penalties = tex_get_par_par(par, par_club_penalties_code), .widow_penalty = tex_get_par_par(par, par_widow_penalty_code), .widow_penalties = tex_get_par_par(par, par_widow_penalties_code), .display_widow_penalty = tex_get_par_par(par, par_display_widow_penalty_code), .display_widow_penalties = tex_get_par_par(par, par_display_widow_penalties_code), .orphan_penalty = tex_get_par_par(par, par_orphan_penalty_code), .orphan_penalties = tex_get_par_par(par, par_orphan_penalties_code), .broken_penalty = tex_get_par_par(par, par_broken_penalty_code), .baseline_skip = tex_get_par_par(par, par_baseline_skip_code), .line_skip = tex_get_par_par(par, par_line_skip_code), .line_skip_limit = tex_get_par_par(par, par_line_skip_limit_code), .adjust_spacing_step = tex_get_par_par(par, par_adjust_spacing_step_code), .adjust_spacing_shrink = tex_get_par_par(par, par_adjust_spacing_shrink_code), .adjust_spacing_stretch = tex_get_par_par(par, par_adjust_spacing_stretch_code), .hyphenation_mode = tex_get_par_par(par, par_hyphenation_mode_code), .shaping_penalties_mode = tex_get_par_par(par, par_shaping_penalties_mode_code), .shaping_penalty = tex_get_par_par(par, par_shaping_penalty_code), }; tex_do_line_break(&properties); /*tex We assume that the list is still okay when we do some post line break stuff. */ } lmt_linebreak_state.calling_back = 0; lmt_node_filter_callback(post_linebreak_filter_callback, line_break_context, start_of_par, &(cur_list.tail)); lmt_packaging_state.pack_begin_line = 0; return; } } tex_confusion("missing local par node"); } /*tex Glue nodes in a horizontal list that is being paragraphed are not supposed to include \quote {infinite} shrinkability; that is why the algorithm maintains four registers for stretching but only one for shrinking. If the user tries to introduce infinite shrinkability, the shrinkability will be reset to finite and an error message will be issued. A boolean variable |no_shrink_error_yet| prevents this error message from appearing more than once per paragraph. Beware, this does an in-place fix to the glue (which can be a register!). As we store glues a bit different we do a different fix here. */ static scaled tex_aux_checked_shrink(halfword p) { if (glue_shrink(p) && glue_shrink_order(p) != normal_glue_order) { if (lmt_linebreak_state.no_shrink_error_yet) { lmt_linebreak_state.no_shrink_error_yet = 0; tex_handle_error( normal_error_type, "Infinite glue shrinkage found in a paragraph", "The paragraph just ended includes some glue that has infinite shrinkability,\n" "e.g., '\\hskip 0pt minus 1fil'. Such glue doesn't belong there---it allows a\n" "paragraph of any length to fit on one line. But it's safe to proceed, since the\n" "offensive shrinkability has been made finite." ); } glue_shrink_order(p) = normal_glue_order; } return glue_shrink(p); } /*tex A pointer variable |cur_p| runs through the given horizontal list as we look for breakpoints. This variable is global, since it is used both by |line_break| and by its subprocedure |try_break|. Another global variable called |threshold| is used to determine the feasibility of individual lines: breakpoints are feasible if there is a way to reach them without creating lines whose badness exceeds |threshold|. (The badness is compared to |threshold| before penalties are added, so that penalty values do not affect the feasibility of breakpoints, except that no break is allowed when the penalty is 10000 or more.) If |threshold| is 10000 or more, all legal breaks are considered feasible, since the |badness| function specified above never returns a value greater than~10000. Up to three passes might be made through the paragraph in an attempt to find at least one set of feasible breakpoints. On the first pass, we have |threshold=pretolerance| and |second_pass = final_pass = false|. If this pass fails to find a feasible solution, |threshold| is set to |tolerance|, |second_pass| is set |true|, and an attempt is made to hyphenate as many words as possible. If that fails too, we add |emergency_stretch| to the background stretchability and set |final_pass = true|. |second_pass| is this our second attempt to break this paragraph and |final_path| our final attempt to break this paragraph while |threshold| is the maximum badness on feasible lines. The maximum fill level for |hlist_stack|. Maybe good if larger than |2 * max_quarterword|, so that box nesting level would overflow first. The stack for |find_protchar_left()| and |find_protchar_right()|; |hlist_stack_level| is the fill level for |hlist_stack| */ # define max_hlist_stack 512 /* We can optimize this when we have a global setting. */ static void tex_aux_warn_expand_pars(void) { if (! lmt_linebreak_state.warned) { tex_normal_warning("font expansion", "using fonts with different limit of expansion in one paragraph is not allowed"); lmt_linebreak_state.warned = 1; } } static int tex_aux_check_expand_pars(halfword adjust_spacing_step, halfword f) { if (adjust_spacing_step > 0) { return 1; } else if ((font_step(f) == 0) || ((font_max_stretch(f) == 0) && (font_max_shrink(f) == 0))) { return 0; } else if (lmt_linebreak_state.current_font_step < 0) { lmt_linebreak_state.current_font_step = font_step(f); } else if (lmt_linebreak_state.current_font_step != font_step(f)) { tex_normal_error("font expansion", "using fonts with different step of expansion in one paragraph is not allowed"); } { int m = font_max_stretch(f); if (m) { if (lmt_linebreak_state.max_stretch_ratio < 0) { lmt_linebreak_state.max_stretch_ratio = m; } else if (lmt_linebreak_state.max_stretch_ratio > m) { lmt_linebreak_state.max_stretch_ratio = m; tex_aux_warn_expand_pars(); } } } { int m = font_max_shrink(f); if (m) { if (lmt_linebreak_state.max_shrink_ratio < 0) { lmt_linebreak_state.max_shrink_ratio = -m; } else if (-lmt_linebreak_state.max_shrink_ratio > -m) { lmt_linebreak_state.max_shrink_ratio = -m; tex_aux_warn_expand_pars(); } } } return 1; } /*tex Search left to right from list head |l|, returns 1st non-skipable item: */ static halfword tex_aux_find_protchar_left(halfword l, int d) { int done = 0 ; halfword initial = l; while (node_next(l) && node_type(l) == hlist_node && tex_zero_box_dimensions(l) && ! box_list(l)) { /*tex For paragraph start with |\parindent = 0pt| or any empty hbox. */ l = node_next(l); done = 1 ; } if (! done && node_type(l) == par_node) { l = node_next(l); done = 1 ; } if (! done && d) { while (node_next(l) && ! (node_type(l) == glyph_node || non_discardable(l))) { /*tex standard discardables at line break, \TEX book, p 95 */ l = node_next(l); } } if (node_type(l) != glyph_node) { halfword t; int run = 1; halfword hlist_stack[max_hlist_stack]; int hlist_stack_level = 0; do { t = l; while (run && node_type(l) == hlist_node && box_list(l)) { if (hlist_stack_level >= max_hlist_stack) { /* return tex_normal_error("push_node", "stack overflow"); */ return initial; } else { hlist_stack[hlist_stack_level++] = l; } l = box_list(l); } while (run && tex_protrusion_skipable(l)) { while (! node_next(l) && hlist_stack_level > 0) { /*tex Don't visit this node again. */ if (hlist_stack_level <= 0) { /*tex This can point to some bug. */ /* return tex_normal_error("pop_node", "stack underflow (internal error)"); */ return initial; } else { l = hlist_stack[--hlist_stack_level]; } run = 0; } if (node_next(l) && node_type(l) == boundary_node && node_subtype(l) == protrusion_boundary && (boundary_data(l) == 1 || boundary_data(l) == 3)) { /*tex Skip next node. */ l = node_next(l); } if (node_next(l)) { l = node_next(l); } else if (hlist_stack_level == 0) { run = 0; } } } while (t != l); } return l; } /*tex Search right to left from list tail |r| to head |l|, returns 1st non-skipable item. */ static halfword tex_aux_find_protchar_right(halfword l, halfword r) { if (r) { halfword t; int run = 1; halfword initial = r; halfword hlist_stack[max_hlist_stack]; int hlist_stack_level = 0; do { t = r; while (run && node_type(r) == hlist_node && box_list(r)) { if (hlist_stack_level >= max_hlist_stack) { /* tex_normal_error("push_node", "stack overflow"); */ return initial; } else { hlist_stack[hlist_stack_level++] = l; } if (hlist_stack_level >= max_hlist_stack) { /* tex_normal_error("push_node", "stack overflow"); */ return initial; } else { hlist_stack[hlist_stack_level++] = r; } l = box_list(r); r = l; while (node_next(r)) { halfword s = r; r = node_next(r); node_prev(r) = s; } } while (run && tex_protrusion_skipable(r)) { while (r == l && hlist_stack_level > 0) { /*tex Don't visit this node again. */ if (hlist_stack_level <= 0) { /*tex This can point to some bug. */ /* return tex_normal_error("pop_node", "stack underflow (internal error)"); */ return initial; } else { r = hlist_stack[--hlist_stack_level]; } if (hlist_stack_level <= 0) { /*tex This can point to some bug. */ /* return tex_normal_error("pop_node", "stack underflow (internal error)"); */ return initial; } else { l = hlist_stack[--hlist_stack_level]; } } if ((r != l) && r) { if (node_prev(r) && node_type(r) == boundary_node && node_subtype(r) == protrusion_boundary && (boundary_data(r) == 2 || boundary_data(r) == 3)) { /*tex Skip next node. */ r = node_prev(r); } if (node_prev(r)) { r = node_prev(r); } else { /*tex This is the input: |\leavevmode \penalty -10000 \penalty -10000| */ run = 0; } } else if (r == l && hlist_stack_level == 0) { run = 0; } } } while (t != r); } return r; } /*tex The algorithm essentially determines the best possible way to achieve each feasible combination of position, line, and fitness. Thus, it answers questions like, \quotation {What is the best way to break the opening part of the paragraph so that the fourth line is a tight line ending at such-and-such a place?} However, the fact that all lines are to be the same length after a certain point makes it possible to regard all sufficiently large line numbers as equivalent, when the looseness parameter is zero, and this makes it possible for the algorithm to save space and time. An \quote {active node} and a \quote {passive node} are created in |mem| for each feasible breakpoint that needs to be considered. Active nodes are three words long and passive nodes are two words long. We need active nodes only for breakpoints near the place in the paragraph that is currently being examined, so they are recycled within a comparatively short time after they are created. An active node for a given breakpoint contains six fields: \startitemize[n] \startitem |vlink| points to the next node in the list of active nodes; the last active node has |vlink=active|. \stopitem \startitem |break_node| points to the passive node associated with this breakpoint. \stopitem \startitem |line_number| is the number of the line that follows this breakpoint. \stopitem \startitem |fitness| is the fitness classification of the line ending at this breakpoint. \stopitem \startitem |type| is either |hyphenated_node| or |unhyphenated_node|, depending on whether this breakpoint is a |disc_node|. \stopitem \startitem |total_demerits| is the minimum possible sum of demerits over all lines leading from the beginning of the paragraph to this breakpoint. \stopitem \stopitemize The value of |node_next(active)| points to the first active node on a vlinked list of all currently active nodes. This list is in order by |line_number|, except that nodes with |line_number > easy_line| may be in any order relative to each other. */ void tex_initialize_active(void) { node_type(active_head) = hyphenated_node; active_line_number(active_head) = max_halfword; /*tex The |subtype| is actually the |fitness|. It is set with |new_node| to one of the fitness values. */ active_fitness(active_head) = very_loose_fit; } /*tex The passive node for a given breakpoint contains eight fields: \startitemize \startitem |vlink| points to the passive node created just before this one, if any, otherwise it is |null|. \stopitem \startitem |cur_break| points to the position of this breakpoint in the horizontal list for the paragraph being broken. \stopitem \startitem |prev_break| points to the passive node that should precede this one in an optimal path to this breakpoint. \stopitem \startitem |serial| is equal to |n| if this passive node is the |n|th one created during the current pass. (This field is used only when printing out detailed statistics about the line-breaking calculations.) \stopitem \startitem |passive_pen_inter| holds the current |localinterlinepenalty| \stopitem \startitem |passive_pen_broken| holds the current |localbrokenpenalty| \stopitem \stopitemize There is a global variable called |passive| that points to the most recently created passive node. Another global variable, |printed_node|, is used to help print out the paragraph when detailed information about the line-breaking computation is being displayed. The most recent node on passive list, the most recent node that has been printed, and the number of passive nodes allocated on this pass, is registered in the passive field. The active list also contains \quote {delta} nodes that help the algorithm compute the badness of individual lines. Such nodes appear only between two active nodes, and they have |type = delta_node|. If |p| and |r| are active nodes and if |q| is a delta node between them, so that |vlink (p) = q| and |vlink (q) = r|, then |q| tells the space difference between lines in the horizontal list that start after breakpoint |p| and lines that start after breakpoint |r|. In other words, if we know the length of the line that starts after |p| and ends at our current position, then the corresponding length of the line that starts after |r| is obtained by adding the amounts in node~|q|. A delta node contains seven scaled numbers, since it must record the net change in glue stretchability with respect to all orders of infinity. The natural width difference appears in |mem[q+1].sc|; the stretch differences in units of pt, sfi, fil, fill, and filll appear in |mem[q + 2 .. q + 6].sc|; and the shrink difference appears in |mem[q + 7].sc|. The |subtype| field of a delta node is not used. {\em NB: Actually, we have more fields now.} As the algorithm runs, it maintains a set of seven delta-like registers for the length of the line following the first active breakpoint to the current position in the given hlist. When it makes a pass through the active list, it also maintains a similar set of seven registers for the length following the active breakpoint of current interest. A third set holds the length of an empty line (namely, the sum of |\leftskip| and |\rightskip|); and a fourth set is used to create new delta nodes. When we pass a delta node we want to do operations like: \starttyping for k := 1 to 7 do cur_active_width[k] := cur_active_width[k] + mem[q+k].sc|}; \stoptyping and we want to do this without the overhead of |for| loops so we use update macros. |active_width| is he distance from first active node to~|cur_p|, |background| the length of an \quote {empty} line, and |break_width| the length being computed after current break. We make |auto_breaking| accessible out of |line_break|. Let's state the principles of the delta nodes more precisely and concisely, so that the following programs will be less obscure. For each legal breakpoint~|p| in the paragraph, we define two quantities $\alpha(p)$ and $\beta(p)$ such that the length of material in a line from breakpoint~|p| to breakpoint~|q| is $\gamma+\beta(q)-\alpha(p)$, for some fixed $\gamma$. Intuitively, $\alpha(p)$ and $\beta(q)$ are the total length of material from the beginning of the paragraph to a point after a break at |p| and to a point before a break at |q|; and $\gamma$ is the width of an empty line, namely the length contributed by |\leftskip| and |\rightskip|. Suppose, for example, that the paragraph consists entirely of alternating boxes and glue skips; let the boxes have widths $x_1\ldots x_n$ and let the skips have widths $y_1\ldots y_n$, so that the paragraph can be represented by $x_1y_1\ldots x_ny_n$. Let $p_i$ be the legal breakpoint at $y_i$; then $\alpha(p_i) = x_1 + y_1 + \cdots + x_i + y_i$, and $\beta (p_i) = x_1 + y_1 + \cdots + x_i$. To check this, note that the length of material from $p_2$ to $p_5$, say, is $\gamma + x_3 + y_3 + x_4 + y_4 + x_5 = \gamma + \beta (p_5) - \alpha (p_2)$. The quantities $\alpha$, $\beta$, $\gamma$ involve glue stretchability and shrinkability as well as a natural width. If we were to compute $\alpha(p)$ and $\beta(p)$ for each |p|, we would need multiple precision arithmetic, and the multiprecise numbers would have to be kept in the active nodes. \TeX\ avoids this problem by working entirely with relative differences or \quote {deltas}. Suppose, for example, that the active list contains $a_1\,\delta_1\,a_2\, \delta_2\,a_3$, where the |a|'s are active breakpoints and the $\delta$'s are delta nodes. Then $\delta_1 = \alpha(a_1) - \alpha(a_2)$ and $\delta_2 = \alpha(a_2) - \alpha(a_3)$. If the line breaking algorithm is currently positioned at some other breakpoint |p|, the |active_width| array contains the value $\gamma +\beta(p) - \alpha(a_1)$. If we are scanning through the list of active nodes and considering a tentative line that runs from $a_2$ to~|p|, say, the |cur_active_width| array will contain the value $\gamma + \beta(p) - \alpha(a_2)$. Thus, when we move from $a_2$ to $a_3$, we want to add $\alpha(a_2) - \alpha(a_3)$ to |cur_active_width|; and this is just $\delta_2$, which appears in the active list between $a_2$ and $a_3$. The |background| array contains $\gamma$. The |break_width| array will be used to calculate values of new delta nodes when the active list is being updated. The heart of the line-breaking procedure is |try_break|, a subroutine that tests if the current breakpoint |cur_p| is feasible, by running through the active list to see what lines of text can be made from active nodes to~|cur_p|. If feasible breaks are possible, new break nodes are created. If |cur_p| is too far from an active node, that node is deactivated. The parameter |pi| to |try_break| is the penalty associated with a break at |cur_p|; we have |pi = eject_penalty| if the break is forced, and |pi=inf_penalty| if the break is illegal. The other parameter, |break_type|, is set to |hyphenated_node| or |unhyphenated_node|, depending on whether or not the current break is at a |disc_node|. The end of a paragraph is also regarded as |hyphenated_node|; this case is distinguishable by the condition |cur_p = null|. \startlines |internal_pen_inter|: running |\localinterlinepenalty| |internal_pen_broken|: running |\localbrokenpenalty| |internal_left_box|: running |\localleftbox| |internal_left_box_width|: running |\localleftbox| |init_internal_left_box|: running |\localleftbox| |init_internal_left_box_width|: running |\localleftbox| width |internal_right_box|: running |\localrightbox| |internal_right_box_width|: running |\localrightbox| width |disc_width|: the length of discretionary material preceding a break \stoplines As we consider various ways to end a line at |cur_p|, in a given line number class, we keep track of the best total demerits known, in an array with one entry for each of the fitness classifications. For example, |minimal_demerits [tight_fit]| contains the fewest total demerits of feasible line breaks ending at |cur_p| with a |tight_fit| line; |best_place [tight_fit]| points to the passive node for the break before |cur_p| that achieves such an optimum; and |best_pl_line[tight_fit]| is the |line_number| field in the active node corresponding to |best_place [tight_fit]|. When no feasible break sequence is known, the |minimal_demerits| entries will be equal to |awful_bad|, which is $2^{30}-1$. Another variable, |minimum_demerits|, keeps track of the smallest value in the |minimal_demerits| array. The length of lines depends on whether the user has specified |\parshape| or |\hangindent|. If |par_shape_ptr| is not null, it points to a $(2n+1)$-word record in |mem|, where the |vinfo| in the first word contains the value of |n|, and the other $2n$ words contain the left margins and line lengths for the first |n| lines of the paragraph; the specifications for line |n| apply to all subsequent lines. If |par_shape_ptr = null|, the shape of the paragraph depends on the value of |n = hang_after|; if |n >= 0|, hanging indentation takes place on lines |n + 1|, |n + 2|, \dots, otherwise it takes place on lines 1, \dots, $\vert n\vert$. When hanging indentation is active, the left margin is |hang_indent|, if |hang_indent >= 0|, else it is 0; the line length is $|hsize|-\vert|hang_indent|\vert$. The normal setting is |par_shape_ptr = null|, |hang_after = 1|, and |hang_indent = 0|. Note that if |hang_indent = 0|, the value of |hang_after| is irrelevant. Some more variables and remarks: line numbers |> easy_line| are equivalent in break nodes line numbers |> last_special_line| all have the same width |first_width| is the width of all lines |<= last_special_line|, if no |\parshape| has been specified |second_width| is the width of all lines |> last_special_line| |first_indent| is the left margin to go with |first_width| |second_indent| s the left margin to go with |second_width| |best_bet| indicated the passive node and its predecessors |fewest_demerits| are the demerits associated with |best_bet| |best_line| is the line number following the last line of the new paragraph |actual_looseness| is the difference between |line_number (best_bet)| and the optimum |best_line| |line_diff| is the difference between the current line number and the optimum |best_line| \TEX\ makes use of the fact that |hlist_node|, |vlist_node|, |rule_node|, |insert_node|, |mark_node|, |adjust_node|, |disc_node|, |whatsit_node|, and |math_node| are at the low end of the type codes, by permitting a break at glue in a list if and only if the |type| of the previous node is less than |math_node|. Furthermore, a node is discarded after a break if its type is |math_node| or~more. */ static halfword tex_aux_clean_up_the_memory(halfword p) { halfword q = node_next(active_head); while (q != active_head) { p = node_next(q); // tex_free_node(q, get_node_size(node_type(q))); // less overhead & testing tex_flush_node(q); q = p; } q = lmt_linebreak_state.passive; while (q) { p = node_next(q); // tex_free_node(q, get_node_size(node_type(q))); // less overhead & testing tex_flush_node(q); q = p; } return p; } /*tex Instead of macros we use inline functions. Nowadays compilers generate code that is quite similar as when we use macros (and sometimes even better). */ inline static void tex_aux_add_disc_source_to_target(halfword adjust_spacing, scaled target[], const scaled source[]) { target[total_glue_amount] += source[total_glue_amount]; if (adjust_spacing) { target[font_stretch_amount] += source[font_stretch_amount]; target[font_shrink_amount] += source[font_shrink_amount]; } } inline static void tex_aux_sub_disc_target_from_source(halfword adjust_spacing, scaled target[], const scaled source[]) { target[total_glue_amount] -= source[total_glue_amount]; if (adjust_spacing) { target[font_stretch_amount] -= source[font_stretch_amount]; target[font_shrink_amount] -= source[font_shrink_amount]; } } inline static void tex_aux_reset_disc_target(halfword adjust_spacing, scaled *target) { target[total_glue_amount] = 0; if (adjust_spacing) { target[font_stretch_amount] = 0; target[font_shrink_amount] = 0; } } /* A memcopy for the whole array is probably more efficient. */ inline static void tex_aux_set_target_to_source(halfword adjust_spacing, scaled target[], const scaled source[]) { // memcpy(&target[total_glue_amount], &source[total_glue_amount], font_shrink_amount * sizeof(halfword)); for (int i = total_glue_amount; i <= total_shrink_amount; i++) { target[i] = source[i]; } if (adjust_spacing) { target[font_stretch_amount] = source[font_stretch_amount]; target[font_shrink_amount] = source[font_shrink_amount]; } } /* These delta nodes use an offset and as a result we waste half of the memory words. So, by not using an offset but just named fields, we can save 4 memory words (32 bytes) per delta node. So, instead of this: \starttyping inline void add_to_target_from_delta(halfword adjust_spacing, scaled *target, halfword delta) { for (int i = total_glue_amount; i <= total_shrink_amount; i++) { target[i] += delta_field(delta, i); } if (adjust_spacing) { target[font_stretch_amount] += delta_field(delta, font_stretch_amount); target[font_shrink_amount] += delta_field(delta, font_shrink_amount); } } \stoptyping We use the more verbose variants and let the compiler optimize the lot. */ inline static void tex_aux_add_to_target_from_delta(halfword adjust_spacing, scaled target[], halfword delta) { target[total_glue_amount] += delta_field_total_glue(delta); target[total_stretch_amount] += delta_field_total_stretch(delta); target[total_fi_amount] += delta_field_total_fi_amount(delta); target[total_fil_amount] += delta_field_total_fil_amount(delta); target[total_fill_amount] += delta_field_total_fill_amount(delta); target[total_filll_amount] += delta_field_total_filll_amount(delta); target[total_shrink_amount] += delta_field_total_shrink(delta); if (adjust_spacing) { target[font_stretch_amount] += delta_field_font_stretch(delta); target[font_shrink_amount] += delta_field_font_shrink(delta); } } inline static void tex_aux_sub_delta_from_target(halfword adjust_spacing, scaled target[], halfword delta) { target[total_glue_amount] -= delta_field_total_glue(delta); target[total_stretch_amount] -= delta_field_total_stretch(delta); target[total_fi_amount] -= delta_field_total_fi_amount(delta); target[total_fil_amount] -= delta_field_total_fil_amount(delta); target[total_fill_amount] -= delta_field_total_fill_amount(delta); target[total_filll_amount] -= delta_field_total_filll_amount(delta); target[total_shrink_amount] -= delta_field_total_shrink(delta); if (adjust_spacing) { target[font_stretch_amount] -= delta_field_font_stretch(delta); target[font_shrink_amount] -= delta_field_font_shrink(delta); } } inline static void tex_aux_add_to_delta_from_delta(halfword adjust_spacing, halfword target, halfword source) { delta_field_total_glue(target) += delta_field_total_glue(source); delta_field_total_stretch(target) += delta_field_total_stretch(source); delta_field_total_fi_amount(target) += delta_field_total_fi_amount(source); delta_field_total_fil_amount(target) += delta_field_total_fil_amount(source); delta_field_total_fill_amount(target) += delta_field_total_fill_amount(source); delta_field_total_filll_amount(target) += delta_field_total_filll_amount(source); delta_field_total_shrink(target) += delta_field_total_shrink(source); if (adjust_spacing) { delta_field_font_stretch(target) += delta_field_font_stretch(source); delta_field_font_shrink(target) += delta_field_font_shrink(source); } } inline static void tex_aux_set_delta_from_difference(halfword adjust_spacing, halfword delta, const scaled source_1[], const scaled source_2[]) { delta_field_total_glue(delta) = (source_1[total_glue_amount] - source_2[total_glue_amount]); delta_field_total_stretch(delta) = (source_1[total_stretch_amount] - source_2[total_stretch_amount]); delta_field_total_fi_amount(delta) = (source_1[total_fi_amount] - source_2[total_fi_amount]); delta_field_total_fil_amount(delta) = (source_1[total_fil_amount] - source_2[total_fil_amount]); delta_field_total_fill_amount(delta) = (source_1[total_fill_amount] - source_2[total_fill_amount]); delta_field_total_filll_amount(delta) = (source_1[total_filll_amount] - source_2[total_filll_amount]); delta_field_total_shrink(delta) = (source_1[total_shrink_amount] - source_2[total_shrink_amount]); if (adjust_spacing) { delta_field_font_stretch(delta) = (source_1[font_stretch_amount] - source_2[font_stretch_amount]); delta_field_font_shrink(delta) = (source_1[font_shrink_amount] - source_2[font_shrink_amount]); } } inline static void tex_aux_add_delta_from_difference(halfword adjust_spacing, halfword delta, const scaled source_1[], const scaled source_2[]) { delta_field_total_glue(delta) += (source_1[total_glue_amount] - source_2[total_glue_amount]); delta_field_total_stretch(delta) += (source_1[total_stretch_amount] - source_2[total_stretch_amount]); delta_field_total_fi_amount(delta) += (source_1[total_fi_amount] - source_2[total_fi_amount]); delta_field_total_fil_amount(delta) += (source_1[total_fil_amount] - source_2[total_fil_amount]); delta_field_total_fill_amount(delta) += (source_1[total_fill_amount] - source_2[total_fill_amount]); delta_field_total_filll_amount(delta) += (source_1[total_filll_amount] - source_2[total_filll_amount]); delta_field_total_shrink(delta) += (source_1[total_shrink_amount] - source_2[total_shrink_amount]); if (adjust_spacing) { delta_field_font_stretch(delta) += (source_1[font_stretch_amount] - source_2[font_stretch_amount]); delta_field_font_shrink(delta) += (source_1[font_shrink_amount] - source_2[font_shrink_amount]); } } /*tex This function is used to add the width of a list of nodes (from a discretionary) to one of the width arrays. Replacement texts and discretionary texts are supposed to contain only character nodes, kern nodes, and box or rule nodes. From now on we just ignore \quite {invalid} nodes. If any such node influences the width, so be it. \starttyping static void bad_node_in_disc_error(halfword p) { tex_formatted_error( "linebreak", "invalid node with type %s found in discretionary", node_data[node_type(p)].name ); } \stoptyping */ static void tex_aux_add_to_widths(halfword s, int adjust_spacing, int adjust_spacing_step, scaled widths[]) { /* todo only check_expand_pars once per font (or don't check) */ while (s) { switch (node_type(s)) { case glyph_node: widths[total_glue_amount] += tex_glyph_width_ex(s); // ex if (adjust_spacing && ! tex_has_glyph_option(s, glyph_option_no_expansion) && tex_aux_check_expand_pars(adjust_spacing_step, glyph_font(s))) { lmt_packaging_state.previous_char_ptr = s; widths[font_stretch_amount] += tex_char_stretch(s); widths[font_shrink_amount] += tex_char_shrink(s); }; break; case hlist_node: case vlist_node: widths[total_glue_amount] += box_width(s); break; case rule_node: widths[total_glue_amount] += rule_width(s); break; case glue_node: widths[total_glue_amount] += glue_amount(s); widths[total_stretch_amount + glue_stretch_order(s)] += glue_stretch(s); widths[total_shrink_amount] += glue_shrink(s); break; case kern_node: widths[total_glue_amount] += kern_amount(s); if (adjust_spacing == adjust_spacing_full && node_subtype(s) == font_kern_subtype) { halfword n = node_prev(s); if (n && node_type(n) == glyph_node && ! tex_has_glyph_option(node_next(s), glyph_option_no_expansion)) { widths[font_stretch_amount] += tex_kern_stretch(s); widths[font_shrink_amount] += tex_kern_shrink(s); } } break; case disc_node: break; default: /* bad_node_in_disc_error(s); */ break; } s = node_next(s); } } /*tex This function is used to substract the width of a list of nodes (from a discretionary) from one of the width arrays. It is used only once, but deserves it own function because of orthogonality with the |add_to_widths| function. */ static void tex_aux_sub_from_widths(halfword s, int adjust_spacing, int adjust_spacing_step, scaled widths[]) { while (s) { /*tex Subtract the width of node |s| from |break_width|; */ switch (node_type(s)) { case glyph_node: widths[total_glue_amount] -= tex_glyph_width_ex(s); // ex if (adjust_spacing && ! tex_has_glyph_option(s, glyph_option_no_expansion) && tex_aux_check_expand_pars(adjust_spacing_step, glyph_font(s))) { lmt_packaging_state.previous_char_ptr = s; widths[font_stretch_amount] -= tex_char_stretch(s); widths[font_shrink_amount] -= tex_char_shrink(s); } break; case hlist_node: case vlist_node: widths[total_glue_amount] -= box_width(s); break; case rule_node: widths[total_glue_amount] -= rule_width(s); break; case glue_node: widths[total_glue_amount] -= glue_amount(s); widths[total_stretch_amount + glue_stretch_order(s)] -= glue_stretch(s); widths[total_shrink_amount] -= glue_shrink(s); break; case kern_node: widths[total_glue_amount] -= kern_amount(s); if (adjust_spacing == adjust_spacing_full && node_subtype(s) == font_kern_subtype) { halfword n = node_prev(s); if (n && node_type(n) == glyph_node && ! tex_has_glyph_option(node_next(s), glyph_option_no_expansion)) { widths[font_stretch_amount] -= tex_kern_stretch(s); widths[font_shrink_amount] -= tex_kern_shrink(s); } } break; case disc_node: break; default: /* bad_node_in_disc_error(s); */ break; } s = node_next(s); } } /*tex When we insert a new active node for a break at |cur_p|, suppose this new node is to be placed just before active node |a|; then we essentially want to insert $\delta\,|cur_p|\,\delta ^ \prime$ before |a|, where $\delta = \alpha (a) - \alpha (|cur_p|)$ and $\delta ^ \prime = \alpha (|cur_p|) - \alpha (a)$ in the notation explained above. The |cur_active_width| array now holds $\gamma + \beta (|cur_p|) - \alpha (a)$; so $\delta$ can be obtained by subtracting |cur_active_width| from the quantity $\gamma + \beta (|cur_p|) - \alpha (|cur_p|)$. The latter quantity can be regarded as the length of a line from |cur_p| to |cur_p|; we call it the |break_width| at |cur_p|. The |break_width| is usually negative, since it consists of the background (which is normally zero) minus the width of nodes following~|cur_p| that are eliminated after a break. If, for example, node |cur_p| is a glue node, the width of this glue is subtracted from the background; and we also look ahead to eliminate all subsequent glue and penalty and kern and math nodes, subtracting their widths as well. Kern nodes do not disappear at a line break unless they are |explicit|. */ static void tex_aux_compute_break_width(int break_type, int adjust_spacing, int adjust_spacing_step, halfword p) { /*tex Glue and other whitespace to be skipped after a break; used if unhyphenated, or |post_break = null|. */ halfword s = p; if (p) { switch (break_type) { case hyphenated_node: case delta_node: case passive_node: /*tex Compute the discretionary |break_width| values. When |p| is a discretionary break, the length of a line \quotation {from |p| to |p|} has to be defined properly so that the other calculations work out. Suppose that the pre-break text at |p| has length $l_0$, the post-break text has length $l_1$, and the replacement text has length |l|. Suppose also that |q| is the node following the replacement text. Then length of a line from |p| to |q| will be computed as $\gamma + \beta (q) - \alpha (|p|)$, where $\beta (q) = \beta (|p|) - l_0 + l$. The actual length will be the background plus $l_1$, so the length from |p| to |p| should be $\gamma + l_0 + l_1 - l$. If the post-break text of the discretionary is empty, a break may also discard~|q|; in that unusual case we subtract the length of~|q| and any other nodes that will be discarded after the discretionary break. The value of $l_0$ need not be computed, since |line_break| will put it into the global variable |disc_width| before calling |try_break|. In case of nested discretionaries, we always follow the no-break path, as we are talking about the breaking on {\it this} position. */ tex_aux_sub_from_widths(disc_no_break_head(p), adjust_spacing, adjust_spacing_step, lmt_linebreak_state.break_width); tex_aux_add_to_widths(disc_post_break_head(p), adjust_spacing, adjust_spacing_step, lmt_linebreak_state.break_width); tex_aux_add_disc_source_to_target(adjust_spacing, lmt_linebreak_state.break_width, lmt_linebreak_state.disc_width); if (disc_post_break_head(p)) { s = null; } else { /*tex no |post_break|: skip any whitespace following */ s = node_next(p); } break; } } while (s) { switch (node_type(s)) { case glue_node: /*tex Subtract glue from |break_width|; */ lmt_linebreak_state.break_width[total_glue_amount] -= glue_amount(s); lmt_linebreak_state.break_width[total_stretch_amount + glue_stretch_order(s)] -= glue_stretch(s); lmt_linebreak_state.break_width[total_shrink_amount] -= glue_shrink(s); break; case penalty_node: break; case kern_node: if (node_subtype(s) != explicit_kern_subtype && node_subtype(s) != italic_kern_subtype) { return; } else { lmt_linebreak_state.break_width[total_glue_amount] -= kern_amount(s); break; } case math_node: if (tex_math_glue_is_zero(s)) { lmt_linebreak_state.break_width[total_glue_amount] -= math_surround(s); } else { lmt_linebreak_state.break_width[total_glue_amount] -= math_amount(s); lmt_linebreak_state.break_width[total_stretch_amount + math_stretch_order(s)] -= math_stretch(s); lmt_linebreak_state.break_width[total_shrink_amount] -= math_shrink(s); } break; default: return; }; s = node_next(s); } } static void tex_aux_initialize_show_break_node(int callback_id) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "d->", initialize_show_breaks_context); } static void tex_aux_start_show_break_node(int callback_id, int pass) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dd->", start_show_breaks_context, pass); } static void tex_aux_stop_show_break_node(int callback_id) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "d->", stop_show_breaks_context); } static void tex_aux_collect_show_break_node(int callback_id) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "d->", collect_show_breaks_context); } static void tex_aux_line_show_break_node(int callback_id) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dNdddd->", line_show_breaks_context, lmt_linebreak_state.just_box, lmt_packaging_state.last_badness, lmt_packaging_state.last_overshoot, lmt_packaging_state.total_shrink[normal_glue_order], lmt_packaging_state.total_stretch[normal_glue_order] ); } static void tex_aux_delete_break_node(halfword active, halfword passive, int callback_id) { (void) active; lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dd->", delete_show_breaks_context, passive_serial(passive) ); } static void tex_aux_wrapup_show_break_node(int callback_id) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "d->", wrapup_show_breaks_context); } static void tex_aux_show_break_node(halfword active, halfword passive, int callback_id, int pass, halfword *demerits) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "ddddddddNdd->r", report_show_breaks_context, pass, passive_serial(passive), passive_prev_break(passive) ? passive_serial(passive_prev_break(passive)) : 0, active_line_number(active) - 1, node_type(active), active_fitness(active), active_total_demerits(active), /* demerits */ passive_cur_break(passive), lmt_linebreak_state.do_last_line_fit ? active_short(active) : 0, lmt_linebreak_state.do_last_line_fit ? active_glue(active) : 0, demerits /* optionally changed */ ); } static void tex_aux_list_break_node(halfword passive, int callback_id) { lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dd->", list_show_breaks_context, passive_serial(passive) ); } static void tex_aux_print_break_node(halfword active, halfword passive) { /*tex Print a symbolic description of the new break node. */ tex_print_format( "%l[break: serial %i, line %i.%i,%s demerits %i, ", passive_serial(passive), active_line_number(active) - 1, active_fitness(active), node_type(active) == hyphenated_node ? " hyphenated, " : "", active_total_demerits(active) ); if (lmt_linebreak_state.do_last_line_fit) { /*tex Print additional data in the new active node. */ tex_print_format( " short %D, %s %D, ", active_short(active), pt_unit, passive_cur_break(passive) ? "glue" : "active", active_glue(active), pt_unit ); } tex_print_format( "previous %i]", passive_prev_break(passive) ? passive_serial(passive_prev_break(passive)) : null ); } static const char *tex_aux_node_name(halfword cur_p) { if (cur_p) { /*tex This could be more generic helper. */ switch (node_type(cur_p)) { case penalty_node : return "penalty"; case disc_node : return "discretionary"; case kern_node : return "kern"; case glue_node : return "glue"; /* in traditional tex "" */ default : return "math"; } } else { return "par"; } } static void tex_aux_print_feasible_break(halfword cur_p, halfword r, halfword b, int pi, int d, int artificial_demerits, const line_break_properties *properties) { (void) properties; /*tex Print a symbolic description of this feasible break. */ if (lmt_linebreak_state.printed_node != cur_p) { /*tex Print the list between |printed_node| and |cur_p|, then set |printed_node := cur_p|. */ tex_print_nlp(); if (cur_p) { halfword save_link = node_next(cur_p); node_next(cur_p) = null; tex_short_display(node_next(lmt_linebreak_state.printed_node)); node_next(cur_p) = save_link; } else { tex_short_display(node_next(lmt_linebreak_state.printed_node)); } lmt_linebreak_state.printed_node = cur_p; } tex_print_format( "%l[break: feasible, trigger %s, serial %i, badness %B, penalty %i, demerits %B]", tex_aux_node_name(cur_p), active_break_node(r) ? passive_serial(active_break_node(r)) : 0, b, pi, artificial_demerits ? awful_bad : d ); } /*tex We implement this one later on. */ /* The only reason why we still have line_break_dir is because we have some experimental protrusion trickery depending on it. */ static void tex_aux_post_line_break(const line_break_properties *properties, halfword line_break_dir, int callback_id); /*tex The next subroutine is used to compute the badness of glue, when a total |t| is supposed to be made from amounts that sum to~|s|. According to {\em The \TEX book}, the badness of this situation is $100(t/s)^3$; however, badness is simply a heuristic, so we need not squeeze out the last drop of accuracy when computing it. All we really want is an approximation that has similar properties. The actual method used to compute the badness is easier to read from the program than to describe in words. It produces an integer value that is a reasonably close approximation to $100(t/s)^3$, and all implementations of \TEX\ should use precisely this method. Any badness of $2^{13}$ or more is treated as infinitely bad, and represented by 10000. It is not difficult to prove that |badness (t + 1, s) >= badness (t, s) >= badness (t, s + 1)| The badness function defined here is capable of computing at most 1095 distinct values, but that is plenty. A core aspect of the linebreak algorithm is the calculation of badness. The formula currently used has evolved with the tex versions before Don Knuth settled on this approach. And I (HH) admit that I see no real reason to change something here. The only possible extension could be changing the hardcoded |loose_criterium| of 99 and |decent_criterium| of 12. These could become parameters instead. When looking at the code you will notice a loop that runs from |very_loose_fit| to |tight_fit| with the following four steps: \starttyping very_loose_fit loose_fit decent_fit tight_fit \stoptyping where we have only |loose_fit| and |decent_fit| with associated criteria later on. So, as an experiment I decided to add two steps in between. \starttyping very_loose_fit semi_loose_fit loose_fit decent_fit semi_tight_fit tight_fit \stoptyping Watch how we keep the assymetrical nature of this sequence: there is basicaly one tight step less than loose steps. Adding these steps took hardly any code so it was a cheap experiment. However, the result is not that spectacular: I'm pretty sure that users will not be able to choose consistently what result looks better, but who knows. For the moment I keep it, if only to be able to discuss it as useless extension. Configuring the value s is done with |\linebreakcriterium| which gets split into 4 parts (2 bytes per criterium). It is probably hard to explain to users what a different setting does and although one can force different output in narrow raggedright text it would probbably enough to just make the |decent_criterium| configureable. Anyway, because we're talking heuristics and pretty good estimates from Don Knuth here, it would be pretentious to suggest that I really did research this fuzzy topic (if it was worth the effort at all). Here |large_width_excess| is 110.32996pt while |small_stretchability| equals 25.38295pt. */ /*tex Around 2023-05-24 Mikael Sundqvist and I did numerous tests with the badness function below in comparison with the variant mentioned in Digital Typography (DEK) and we observed that indeed both functions behave pretty close (emulations with lua, mathematica etc). In practice one can get different badness values (especially low numbers). We ran some test on documents and on hundreds of pages one can get a few different decisions. The main reason for looking into this was that we were exploring a bit more visual approach to deciding on what penalties to use in the math inter-atom spacing in \CONTEXT\ (where we use a more granular class model). In the end the magic criteria became even more magic (and impressive). BTW, indeed we could get these 1095 different badness cases with as maximum calculated one 8189. */ halfword tex_badness(scaled t, scaled s) { /*tex Approximation to $\alpha t/s$, where $\alpha^3 \approx 100 \cdot 2^{18}$ */ if (t == 0) { return 0; } else if (s <= 0) { return infinite_bad; } else { /*tex $297^3 = 99.94 \times 2^{18}$ */ if (t <= large_width_excess) { t = (t * 297) / s; /* clipping by integer division */ } else if (s >= small_stretchability) { t = t / (s / 297); /* clipping by integer division */ } else { /*tex When we end up here |t| is pretty large so we can as well save a test and return immediately. (HH & MS: we tested this while cheating a bit because this function is seldom entered with values that make us end up here.) */ return infinite_bad; } if (t > 1290) { /*tex As $1290^3 < 2^{31} < 1291^3$ we catch an overflow here. */ /* actually badness 8189 */ return infinite_bad; } else { /*tex 297*297*297 == 26198073 / 100 => 261981 */ /*tex This is $t^3 / 2^{18}$, rounded to the nearest integer */ return (t * t * t + 0400000) / 01000000; /* 0400000/01000000 == 1/2 */ // return (t * t * t + 0x20000) / 0x40000; // return (t * t * t + 131072) / 262144; } } } inline static void tex_split_line_break_criterium(halfword criterium, halfword *semi_tight, halfword *decent, halfword *semi_loose, halfword *loose) { *semi_tight = (criterium >> 24) & 0x7F; *decent = (criterium >> 16) & 0x7F; *semi_loose = (criterium >> 8) & 0x7F; *loose = criterium & 0x7F; if (! *semi_tight) { *semi_tight = semi_tight_criterium; } if (! *decent) { *decent = decent_criterium; } if (! *semi_loose) { *semi_loose = semi_loose_criterium; } if (! *loose) { *loose = loose_criterium; } } inline static halfword tex_normalized_loose_badness(halfword b, halfword loose, halfword semi_loose, halfword decent) { if (b > loose) { return very_loose_fit; } else if (b > semi_loose) { return semi_loose_fit; } else if (b > decent) { return loose_fit; } else { return decent_fit; } } inline static halfword tex_normalized_tight_badness(halfword b, halfword decent, halfword semi_tight) { if (b > semi_tight) { return semi_tight_fit; } else if (b > decent) { return tight_fit; } else { return decent_fit; } } static void tex_check_protrusion_shortfall(halfword r, halfword first_p, halfword cur_p, halfword *shortfall) { // if (line_break_dir == dir_righttoleft) { // /*tex Not now, we need to keep more track. */ // } else { halfword o = null; halfword l = active_break_node(r) ? passive_cur_break(active_break_node(r)) : first_p; if (cur_p) { o = node_prev(cur_p); if (node_next(o) != cur_p) { tex_normal_error("linebreak", "the node list is messed up"); } } /*tex The last characters (hyphenation character) if these two list should always be the same anyway, so we just look at |pre_break|. Let's look at the right margin first. */ if (cur_p && node_type(cur_p) == disc_node && disc_pre_break_head(cur_p)) { /*tex A |disc_node| with non-empty |pre_break|, protrude the last char of |pre_break|: */ o = disc_pre_break_tail(cur_p); } else { o = tex_aux_find_protchar_right(l, o); } if (o && node_type(o) == glyph_node) { shortfall += tex_char_protrusion(o, right_margin_kern_subtype); // char_pw_kern(o, right_margin_kern, &margin_kern_stretch, &margin_kern_shrink); } /*tex now the left margin */ if (l && (node_type(l) == disc_node) && (disc_post_break_head(l))) { /*tex The first char could be a disc! Protrude the first char. */ o = disc_post_break_head(l); } else { o = tex_aux_find_protchar_left(l, 1); } if (o && node_type(o) == glyph_node) { shortfall += tex_char_protrusion(o, left_margin_kern_subtype); // char_pw_kern(o, left_margin_kern, &margin_kern_stretch, &margin_kern_shrink); } // } } static void tex_aux_try_break( const line_break_properties *properties, halfword penalty, halfword break_type, halfword first_p, halfword cur_p, int callback_id, int pass ) { /*tex stays a step behind |r| */ halfword prev_r = active_head; /*tex a step behind |prev_r|, if |type(prev_r) = delta_node| */ halfword prev_prev_r = null; /*tex distance from current active node */ scaled cur_active_width[n_of_glue_amounts] = { 0 }; /*tex These status arrays are global to the main loop and will be initialized as we go. */ halfword best_place[n_of_finess_values]; halfword best_place_line[n_of_finess_values]; scaled best_place_short[n_of_finess_values]; scaled best_place_glue[n_of_finess_values]; /* These are more local but we keep them here because of readability. */ /*tex badness of test line */ halfword badness = 0; /*tex demerits of test line */ int demerits = 0; /*tex glue stretch or shrink of test line, adjustment for last line */ scaled glue = 0; /*tex used in badness calculations */ scaled shortfall = 0; /*tex maximum line number in current equivalence class of lines */ halfword old_line = 0; /*tex have we found a feasible break at |cur_p|? */ int no_break_yet = 1; /*tex should node |r| remain in the active list? */ int node_r_stays_active; /*tex possible fitness class of test line */ halfword fit_class; /*tex has |d| been forced to zero? */ int artificial_demerits; /*tex the current line will be justified to this width */ scaled line_width = 0; /*tex line number of current active node */ halfword line = 0; /*tex We have added an extra category, just as experiment. In practice there is very little to gain here as it becomes kind of fuzzy and DEK values are quite okay. */ halfword semi_tight, decent, semi_loose, loose; /*tex in par node */ tex_split_line_break_criterium(line_break_criterium_par, &semi_tight, &decent, &semi_loose, &loose); /*tex Make sure that |pi| is in the proper range; */ if (penalty >= infinite_penalty) { /*tex this breakpoint is inhibited by infinite penalty */ return; } else if (penalty <= -infinite_penalty) { /*tex this breakpoint will be forced */ penalty = eject_penalty; } tex_aux_set_target_to_source(properties->adjust_spacing, cur_active_width, lmt_linebreak_state.active_width); while (1) { /*tex Here |r| runs through the active list: */ halfword r = node_next(prev_r); /*tex If node |r| is of type |delta_node|, update |cur_active_width|, set |prev_r| and |prev_prev_r|, then |goto continue|. The following code uses the fact that |type (active) <> delta_node|. */ if (node_type(r) == delta_node) { tex_aux_add_to_target_from_delta(properties->adjust_spacing, cur_active_width, r); prev_prev_r = prev_r; prev_r = r; continue; } else { /*tex We have an |unhyphenated_node| or |hyphenated_node|. */ } /*tex If a line number class has ended, create new active nodes for the best feasible breaks in that class; then |return| if |r = active|, otherwise compute the new |line_width|. The first part of the following code is part of \TEX's inner loop, so we don't want to waste any time. The current active node, namely node |r|, contains the line number that will be considered next. At the end of the list we have arranged the data structure so that |r = active| and |line_number (active) > old_l|. */ line = active_line_number(r); if (line > old_line) { /*tex Now we are no longer in the inner loop (well ...). */ if ((lmt_linebreak_state.minimum_demerits < awful_bad) && ((old_line != lmt_linebreak_state.easy_line) || (r == active_head))) { /*tex Create new active nodes for the best feasible breaks just found. It is not necessary to create new active nodes having |minimal_demerits| greater than |linebreak_state.minimum_demerits + abs (adj_demerits)|, since such active nodes will never be chosen in the final paragraph breaks. This observation allows us to omit a substantial number of feasible breakpoints from further consideration. */ if (no_break_yet) { no_break_yet = 0; tex_aux_set_target_to_source(properties->adjust_spacing, lmt_linebreak_state.break_width, lmt_linebreak_state.background); tex_aux_compute_break_width(break_type, properties->adjust_spacing, properties->adjust_spacing_step, cur_p); } /*tex Insert a delta node to prepare for breaks at |cur_p|. We use the fact that |type (active) <> delta_node|. */ if (node_type(prev_r) == delta_node) { /*tex modify an existing delta node */ tex_aux_add_delta_from_difference(properties->adjust_spacing, prev_r, lmt_linebreak_state.break_width, cur_active_width); } else if (prev_r == active_head) { /*tex no delta node needed at the beginning */ tex_aux_set_target_to_source(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.break_width); } else { halfword q = tex_new_node(delta_node, (quarterword) very_loose_fit); node_next(q) = r; tex_aux_set_delta_from_difference(properties->adjust_spacing, q, lmt_linebreak_state.break_width, cur_active_width); node_next(prev_r) = q; prev_prev_r = prev_r; prev_r = q; } if (abs(properties->adj_demerits) >= awful_bad - lmt_linebreak_state.minimum_demerits) { lmt_linebreak_state.minimum_demerits = awful_bad - 1; } else { lmt_linebreak_state.minimum_demerits += abs(properties->adj_demerits); } for (halfword fit_class = very_loose_fit; fit_class <= tight_fit; fit_class++) { if (lmt_linebreak_state.minimal_demerits[fit_class] <= lmt_linebreak_state.minimum_demerits) { /*tex Insert a new active node from |best_place [fit_class]| to |cur_p|. When we create an active node, we also create the corresponding passive node. In the passive node we also keep track of the subparagraph penalties. */ halfword passive = tex_new_node(passive_node, (quarterword) very_loose_fit); halfword active = tex_new_node((quarterword) break_type, (quarterword) fit_class); halfword prev_break = best_place[fit_class]; /*tex Initialize the passive node: */ passive_cur_break(passive) = cur_p; passive_serial(passive) = ++lmt_linebreak_state.pass_number; passive_prev_break(passive) = prev_break; passive_pen_inter(passive) = lmt_linebreak_state.internal_penalty_interline; passive_pen_broken(passive) = lmt_linebreak_state.internal_penalty_broken; passive_last_left_box(passive) = lmt_linebreak_state.internal_left_box; passive_last_left_box_width(passive) = lmt_linebreak_state.internal_left_box_width; if (prev_break) { passive_left_box(passive) = passive_last_left_box(prev_break); passive_left_box_width(passive) = passive_last_left_box_width(prev_break); } else { passive_left_box(passive) = lmt_linebreak_state.init_internal_left_box; passive_left_box_width(passive) = lmt_linebreak_state.init_internal_left_box_width; } passive_right_box(passive) = lmt_linebreak_state.internal_right_box; passive_right_box_width(passive) = lmt_linebreak_state.internal_right_box_width; passive_middle_box(passive) = lmt_linebreak_state.internal_middle_box; /*tex Initialize the active node: */ active_break_node(active) = passive; active_line_number(active) = best_place_line[fit_class] + 1; active_total_demerits(active) = lmt_linebreak_state.minimal_demerits[fit_class]; // active_reserved(active) = lmt_linebreak_state.pass_number; if (lmt_linebreak_state.do_last_line_fit) { /*tex Store additional data in the new active node. */ active_short(active) = best_place_short[fit_class]; active_glue(active) = best_place_glue[fit_class]; } /*tex Append the passive node. */ node_next(passive) = lmt_linebreak_state.passive; lmt_linebreak_state.passive = passive; /*tex Append the active node. */ node_next(active) = r; node_next(prev_r) = active; prev_r = active; /* */ if (callback_id) { halfword demerits = active_total_demerits(active); tex_aux_show_break_node(active, passive, callback_id, pass, &demerits); active_total_demerits(active) = demerits; } if (properties->tracing_paragraphs > 0) { tex_aux_print_break_node(active, passive); } } lmt_linebreak_state.minimal_demerits[fit_class] = awful_bad; } lmt_linebreak_state.minimum_demerits = awful_bad; /*tex Insert a delta node to prepare for the next active node. When the following code is performed, we will have just inserted at least one active node before |r|, so |type (prev_r) <> delta_node|. */ if (r != active_head) { halfword delta = tex_new_node(delta_node, (quarterword) very_loose_fit); node_next(delta) = r; tex_aux_set_delta_from_difference(properties->adjust_spacing, delta, cur_active_width, lmt_linebreak_state.break_width); node_next(prev_r) = delta; prev_prev_r = prev_r; prev_r = delta; } } /*tex Quit on an active node, otherwise compute the new line width. When we come to the following code, we have just encountered the first active node~|r| whose |line_number| field contains |l|. Thus we want to compute the length of the $l\mskip1mu$th line of the current paragraph. Furthermore, we want to set |old_l| to the last number in the class of line numbers equivalent to~|l|. */ if (r == active_head) { return; } else if (line > lmt_linebreak_state.easy_line) { old_line = max_halfword - 1; line_width = lmt_linebreak_state.second_width; } else { old_line = line; /* if (properties->par_shape && specification_repeat(properties->par_shape)) { line_width = get_specification_width(properties->par_shape, l); } else */ if (line > lmt_linebreak_state.last_special_line) { line_width = lmt_linebreak_state.second_width; } else if (properties->par_shape) { line_width = tex_get_specification_width(properties->par_shape, line); } else { line_width = lmt_linebreak_state.first_width; } } } /*tex If a line number class has ended, create new active nodes for the best feasible breaks in that class; then |return| if |r = active|, otherwise compute the new |line_width|. Consider the demerits for a line from |r| to |cur_p|; deactivate node |r| if it should no longer be active; then |goto continue| if a line from |r| to |cur_p| is infeasible, otherwise record a new feasible break. */ artificial_demerits = 0; shortfall = line_width - cur_active_width[total_glue_amount]; if (active_break_node(r)) { shortfall -= passive_last_left_box_width(active_break_node(r)); } else { shortfall -= lmt_linebreak_state.init_internal_left_box_width; } shortfall -= lmt_linebreak_state.internal_right_box_width; // halfword margin_kern_stretch = 0; // halfword margin_kern_shrink = 0; if (properties->protrude_chars) { tex_check_protrusion_shortfall(r, first_p, cur_p, &shortfall); } /*tex The only reason why we have a shared ratio is that we need to calculate the shortfall for a line with mixed fonts. BTW, why do we divide by 2? */ if (shortfall == 0) { /*tex We're okay. */ } else if (shortfall > 0) { halfword total_stretch = cur_active_width[font_stretch_amount]; // halfword total_stretch = cur_active_width[font_stretch_amount] + margin_kern_stretch; if (total_stretch > 0) { if (total_stretch > shortfall) { shortfall = (total_stretch / (lmt_linebreak_state.max_stretch_ratio / lmt_linebreak_state.current_font_step)) / 2; } else { shortfall -= total_stretch; } } } else if (shortfall < 0) { halfword total_shrink = cur_active_width[font_shrink_amount]; // halfword total_shrink = cur_active_width[font_shrink_amount] + margin_kern_shrink; if (total_shrink > 0) { if (total_shrink > -shortfall) { shortfall = - (total_shrink / (lmt_linebreak_state.max_shrink_ratio / lmt_linebreak_state.current_font_step)) / 2; } else { shortfall += total_shrink; } } } if (shortfall > 0) { /*tex Set the value of |b| to the badness for stretching the line, and compute the corresponding |fit_class|. When a line must stretch, the available stretchability can be found in the subarray |cur_active_width [2 .. 6]|, in units of points, sfi, fil, fill and filll. The present section is part of \TEX's inner loop, and it is most often performed when the badness is infinite; therefore it is worth while to make a quick test for large width excess and small stretchability, before calling the |badness| subroutine. */ if (cur_active_width[total_fi_amount] || cur_active_width[total_fil_amount] || cur_active_width[total_fill_amount] || cur_active_width[total_filll_amount]) { if (lmt_linebreak_state.do_last_line_fit) { if (! cur_p) { /*tex The last line of a paragraph. Perform computations for last line and |goto found|. Here we compute the adjustment |g| and badness |b| for a line from |r| to the end of the paragraph. When any of the criteria for adjustment is violated we fall through to the normal algorithm. The last line must be too short, and have infinite stretch entirely due to |par_fill_skip|. */ if (active_short(r) == 0 || active_glue(r) <= 0) { /*tex Previous line was neither stretched nor shrunk, or was infinitely bad. */ goto NOT_FOUND; } if (cur_active_width[total_fi_amount] != lmt_linebreak_state.fill_width[fi_order] || cur_active_width[total_fil_amount] != lmt_linebreak_state.fill_width[fil_order] || cur_active_width[total_fill_amount] != lmt_linebreak_state.fill_width[fill_order] || cur_active_width[total_filll_amount] != lmt_linebreak_state.fill_width[filll_order]) { /*tex Infinite stretch of this line not entirely due to |par_fill_skip|. */ goto NOT_FOUND; } if (active_short(r) > 0) { glue = cur_active_width[total_stretch_amount]; } else { glue = cur_active_width[total_shrink_amount]; } if (glue <= 0) { /*tex No finite stretch resp.\ no shrink. */ goto NOT_FOUND; } lmt_scanner_state.arithmic_error = 0; glue = tex_fract(glue, active_short(r), active_glue(r), max_dimen); if (properties->last_line_fit < 1000) { glue = tex_fract(glue, properties->last_line_fit, 1000, max_dimen); } if (lmt_scanner_state.arithmic_error) { glue = (active_short(r) > 0) ? max_dimen : -max_dimen; } if (glue > 0) { /*tex Set the value of |b| to the badness of the last line for stretching, compute the corresponding |fit_class, and |goto found|. These badness computations are rather similar to those of the standard algorithm, with the adjustment amount |g| replacing the |shortfall|. */ if (glue > shortfall) { glue = shortfall; } if (glue > large_width_excess && (cur_active_width[total_stretch_amount] < small_stretchability)) { badness = infinite_bad; fit_class = very_loose_fit; } else { badness = tex_badness(glue, cur_active_width[total_stretch_amount]); fit_class = tex_normalized_loose_badness(badness, loose, semi_loose, decent); } goto FOUND; } else if (glue < 0) { /*tex Set the value of |b| to the badness of the last line for shrinking, compute the corresponding |fit_class, and |goto found||. */ if (-glue > cur_active_width[total_shrink_amount]) { glue = -cur_active_width[total_shrink_amount]; } badness = tex_badness(-glue, cur_active_width[total_shrink_amount]); fit_class = tex_normalized_tight_badness(badness, decent, semi_tight); goto FOUND; } } NOT_FOUND: shortfall = 0; } badness = 0; /*tex Infinite stretch. */ fit_class = decent_fit; } else if (shortfall > large_width_excess && cur_active_width[total_stretch_amount] < small_stretchability) { badness = infinite_bad; fit_class = very_loose_fit; } else { badness = tex_badness(shortfall, cur_active_width[total_stretch_amount]); fit_class = tex_normalized_loose_badness(badness, loose, semi_loose, decent); } } else { /*tex Set the value of |b| to the badness for shrinking the line, and compute the corresponding |fit_class|. Shrinkability is never infinite in a paragraph; we can shrink the line from |r| to |cur_p| by at most |cur_active_width [total_shrink_amount]|. */ if (-shortfall > cur_active_width[total_shrink_amount]) { badness = infinite_bad + 1; } else { badness = tex_badness(-shortfall, cur_active_width[total_shrink_amount]); } fit_class = tex_normalized_tight_badness(badness, decent, semi_tight); } if (lmt_linebreak_state.do_last_line_fit) { /*tex Adjust the additional data for last line; */ if (! cur_p) { shortfall = 0; glue = 0; } else if (shortfall > 0) { glue = cur_active_width[total_stretch_amount]; } else if (shortfall < 0) { glue = cur_active_width[total_shrink_amount]; } else { glue = 0; } } FOUND: if ((badness > infinite_bad) || (penalty == eject_penalty)) { /*tex Prepare to deactivate node~|r|, and |goto deactivate| unless there is a reason to consider lines of text from |r| to |cur_p|. During the final pass, we dare not lose all active nodes, lest we lose touch with the line breaks already found. The code shown here makes sure that such a catastrophe does not happen, by permitting overfull boxes as a last resort. This particular part of \TEX\ was a source of several subtle bugs before the correct program logic was finally discovered; readers who seek to improve \TEX\ should therefore think thrice before daring to make any changes here. */ if (lmt_linebreak_state.final_pass && (lmt_linebreak_state.minimum_demerits == awful_bad) && (node_next(r) == active_head) && (prev_r == active_head)) { /*tex Set demerits zero, this break is forced. */ artificial_demerits = 1; } else if (badness > lmt_linebreak_state.threshold) { goto DEACTIVATE; } node_r_stays_active = 0; } else { prev_r = r; if (badness > lmt_linebreak_state.threshold) { continue; } else { node_r_stays_active = 1; } } /*tex Record a new feasible break. When we get to this part of the code, the line from |r| to |cur_p| is feasible, its badness is~|b|, and its fitness classification is |fit_class|. We don't want to make an active node for this break yet, but we will compute the total demerits and record them in the |minimal_demerits| array, if such a break is the current champion among all ways to get to |cur_p| in a given line-number class and fitness class. */ if (artificial_demerits) { demerits = 0; } else { /*tex Compute the demerits, |d|, from |r| to |cur_p|. */ demerits = properties->line_penalty + badness; if (abs(demerits) >= 10000) { demerits = 100000000; } else { demerits = demerits * demerits; } if (penalty != 0) { if (penalty > 0) { demerits += (penalty * penalty); } else if (penalty > eject_penalty) { demerits -= (penalty * penalty); } } if (break_type == hyphenated_node && node_type(r) == hyphenated_node) { if (cur_p) { demerits += properties->double_hyphen_demerits; } else { demerits += properties->final_hyphen_demerits; } } /*tex Here |fitness| is just the subtype, so we could have put the cast in the macro instead: |# define fitness (n) ((halfword) (subtype (n))|. We need to cast because some compilers (versions or whatever) get confused by the type of (unsigned) integer used. */ if (abs(fit_class - (halfword) active_fitness(r)) > 1) { demerits = demerits + properties->adj_demerits; } } if (properties->tracing_paragraphs > 0) { tex_aux_print_feasible_break(cur_p, r, badness, penalty, demerits, artificial_demerits, properties); } /*tex This is the minimum total demerits from the beginning to |cur_p| via |r|. */ demerits += active_total_demerits(r); if (demerits <= lmt_linebreak_state.minimal_demerits[fit_class]) { lmt_linebreak_state.minimal_demerits[fit_class] = demerits; best_place[fit_class] = active_break_node(r); best_place_line[fit_class] = line; if (lmt_linebreak_state.do_last_line_fit) { /*tex Store additional data for this feasible break. For each feasible break we record the shortfall and glue stretch or shrink (or adjustment). */ best_place_short[fit_class] = shortfall; best_place_glue[fit_class] = glue; } if (demerits < lmt_linebreak_state.minimum_demerits) { lmt_linebreak_state.minimum_demerits = demerits; } } /*tex Record a new feasible break. */ if (node_r_stays_active) { /*tex |prev_r| has been set to |r|. */ continue; } DEACTIVATE: /*tex Deactivate node |r|. When an active node disappears, we must delete an adjacent delta node if the active node was at the beginning or the end of the active list, or if it was surrounded by delta nodes. We also must preserve the property that |cur_active_width| represents the length of material from |vlink (prev_r)| to~|cur_p|. */ node_next(prev_r) = node_next(r); if (callback_id) { tex_aux_delete_break_node(r, active_break_node(r), callback_id); } tex_flush_node(r); if (prev_r == active_head) { /*tex Update the active widths, since the first active node has been deleted. The following code uses the fact that |type (active) <> delta_node|. If the active list has just become empty, we do not need to update the |active_width| array, since it will be initialized when an active node is next inserted. */ r = node_next(active_head); if (node_type(r) == delta_node) { tex_aux_add_to_target_from_delta(properties->adjust_spacing, lmt_linebreak_state.active_width, r); tex_aux_set_target_to_source(properties->adjust_spacing, cur_active_width, lmt_linebreak_state.active_width); node_next(active_head) = node_next(r); tex_flush_node(r); } } else if (node_type(prev_r) == delta_node) { r = node_next(prev_r); if (r == active_head) { tex_aux_sub_delta_from_target(properties->adjust_spacing, cur_active_width, prev_r); node_next(prev_prev_r) = active_head; tex_flush_node(prev_r); prev_r = prev_prev_r; } else if (node_type(r) == delta_node) { tex_aux_add_to_target_from_delta(properties->adjust_spacing, cur_active_width, r); tex_aux_add_to_delta_from_delta(properties->adjust_spacing, prev_r, r); node_next(prev_r) = node_next(r); tex_flush_node(r); } } } } static halfword tex_aux_inject_orphan_penalty(halfword current, halfword amount) { halfword previous = node_prev(current); if (previous && node_type(previous) != penalty_node) { halfword penalty = tex_new_penalty_node(amount, orphan_penalty_subtype); tex_couple_nodes(previous, penalty); tex_couple_nodes(penalty, current); return previous; } else { return current; } } inline static int tex_aux_valid_glue_break(halfword p) { halfword prv = node_prev(p); return (prv && prv != temp_head && (node_type(prv) == glyph_node || precedes_break(prv) || precedes_kern(prv) || precedes_dir(prv))); } inline static halfword tex_aux_upcoming_penalty(halfword p) { halfword n = node_next(p); return (n && node_type(n) == math_node && node_subtype(n) == begin_inline_math) ? math_penalty(n) : 0; } /*tex I played a bit with a height driven hanging indentation. One can store |cur_p| in the active node and progressively calculate the height + depth and then act on that but in the end interline space, adjustsm etc. also have to be taken into account and that all happens later so in the end it makes no sense. There are valdi reasons why \TEX\ can't do some things reliable: user demands are unpredictable. */ /*tex Here we pickup the line number from |prev_graf| which relates to display math inside a paragraph. A display formula is then considered to span three lines. Of course this also assume a constant baseline distance with lines heigths not exceeding that amount. It also assumes that the shape and hang are not reset. We check the prevgraf for a large value because when we're close to |max_integer| we can wrap around due to addition beyond that and negative values has side effects (see musings-sideffects) but it's optional so that we can actually use these side effects. */ # define max_prev_graf (max_integer/2) void tex_do_line_break(line_break_properties *properties) { /*tex Miscellaneous nodes of temporary interest. */ int line_break_dir = properties->paragraph_dir; int callback_id = lmt_callback_defined(show_break_callback); int force_check_hyphenation = hyphenation_permitted(properties->hyphenation_mode, force_check_hyphenation_mode); (void) (properties->inter_line_penalties); /* avoid not used message */ /*tex Fix a buglet that probably is a feature. */ if ((cur_list.prev_graf > max_prev_graf || cur_list.prev_graf < 0) && normalize_par_mode_permitted(normalize_par_mode_par, limit_prev_graf_mode)) { tex_formatted_warning("tex", "clipping prev_graf %i to %i", cur_list.prev_graf, max_prev_graf); cur_list.prev_graf = max_prev_graf; } /*tex Get ready to start */ lmt_linebreak_state.fewest_demerits = 0; lmt_linebreak_state.actual_looseness = 0; lmt_linebreak_state.minimum_demerits = awful_bad; for (int i = very_loose_fit; i <= tight_fit; i++) { lmt_linebreak_state.minimal_demerits[i] = awful_bad; } /*tex This has been moved here: */ if (properties->adjust_spacing) { lmt_linebreak_state.adjust_spacing = properties->adjust_spacing; if (properties->adjust_spacing_step > 0) { lmt_linebreak_state.adjust_spacing_step = properties->adjust_spacing_step; lmt_linebreak_state.adjust_spacing_shrink = -properties->adjust_spacing_shrink; /* watch the sign */ lmt_linebreak_state.adjust_spacing_stretch = properties->adjust_spacing_stretch; } else { lmt_linebreak_state.adjust_spacing_step = 0; lmt_linebreak_state.adjust_spacing_shrink = 0; lmt_linebreak_state.adjust_spacing_stretch = 0; } properties->adjust_spacing = tex_checked_font_adjust( properties->adjust_spacing, lmt_linebreak_state.adjust_spacing_step, lmt_linebreak_state.adjust_spacing_shrink, lmt_linebreak_state.adjust_spacing_stretch ); } else { lmt_linebreak_state.adjust_spacing_step = 0; lmt_linebreak_state.adjust_spacing_shrink = 0; lmt_linebreak_state.adjust_spacing_stretch = 0; properties->adjust_spacing = adjust_spacing_off; } lmt_linebreak_state.current_font_step = -1; lmt_linebreak_state.max_shrink_ratio = -1; lmt_linebreak_state.max_stretch_ratio = -1; /*tex We compute the values of |easy_line| and the other local variables relating to line length when the |line_break| procedure is initializing itself. The orphan penalty injection is something new. It works backward so the first penalty in the list is injected first. If there is a penalty before a space we skip that space and also skip a penalty in the list. */ if (properties->orphan_penalties || properties->orphan_penalty) { halfword current = node_prev(properties->parfill_right_skip); if (current) { /*tex Skip over trailing glue and penalties. */ while (current) { switch (node_type(current)) { case glue_node: case penalty_node: current = node_prev(current); break; default: goto INJECT; } } INJECT: if (properties->orphan_penalties) { /*tex Inject specified penalties before spaces. */ int n = specification_count(properties->orphan_penalties); if (n > 0) { halfword i = 0; while (current) { if (node_type(current) == glue_node) { switch (node_subtype(current)) { case space_skip_glue: case xspace_skip_glue: case zero_space_skip_glue: current = tex_aux_inject_orphan_penalty(current, tex_get_specification_penalty(properties->orphan_penalties, ++i)); if (i == n) { goto ALLDONE; } else { break; } } } current = node_prev(current); } } } else { while (current) { if (node_type(current) == glue_node) { switch (node_subtype(current)) { case space_skip_glue: case xspace_skip_glue: case zero_space_skip_glue: tex_aux_inject_orphan_penalty(current, properties->orphan_penalty); goto ALLDONE; } } current = node_prev(current); } } } ALLDONE: ; } if (properties->par_shape) { int n = specification_count(properties->par_shape); if (n > 0) { if (specification_repeat(properties->par_shape)) { lmt_linebreak_state.last_special_line = max_halfword; } else { lmt_linebreak_state.last_special_line = n - 1; } lmt_linebreak_state.second_indent = tex_get_specification_indent(properties->par_shape, n); lmt_linebreak_state.second_width = tex_get_specification_width(properties->par_shape, n); lmt_linebreak_state.second_indent = swap_parshape_indent(properties->paragraph_dir, lmt_linebreak_state.second_indent, lmt_linebreak_state.second_width); } else { lmt_linebreak_state.last_special_line = 0; lmt_linebreak_state.second_width = properties->hsize; lmt_linebreak_state.second_indent = 0; } } else if (properties->hang_indent == 0) { lmt_linebreak_state.last_special_line = 0; lmt_linebreak_state.second_width = properties->hsize; lmt_linebreak_state.second_indent = 0; } else { halfword used_hang_indent = swap_hang_indent(properties->paragraph_dir, properties->hang_indent); /*tex Set line length parameters in preparation for hanging indentation. We compute the values of |easy_line| and the other local variables relating to line length when the |line_break| procedure is initializing itself. */ lmt_linebreak_state.last_special_line = abs(properties->hang_after); if (properties->hang_after < 0) { lmt_linebreak_state.first_width = properties->hsize - abs(used_hang_indent); if (used_hang_indent >= 0) { lmt_linebreak_state.first_indent = used_hang_indent; } else { lmt_linebreak_state.first_indent = 0; } lmt_linebreak_state.second_width = properties->hsize; lmt_linebreak_state.second_indent = 0; } else { lmt_linebreak_state.first_width = properties->hsize; lmt_linebreak_state.first_indent = 0; lmt_linebreak_state.second_width = properties->hsize - abs(used_hang_indent); if (used_hang_indent >= 0) { lmt_linebreak_state.second_indent = used_hang_indent; } else { lmt_linebreak_state.second_indent = 0; } } } if (properties->looseness == 0) { lmt_linebreak_state.easy_line = lmt_linebreak_state.last_special_line; } else { lmt_linebreak_state.easy_line = max_halfword; } lmt_linebreak_state.no_shrink_error_yet = 1; { halfword l = properties->left_skip; halfword r = properties->right_skip; lmt_linebreak_state.background[total_glue_amount] = glue_amount(l) + glue_amount(r); lmt_linebreak_state.background[total_stretch_amount] = 0; lmt_linebreak_state.background[total_fi_amount] = 0; lmt_linebreak_state.background[total_fil_amount] = 0; lmt_linebreak_state.background[total_fill_amount] = 0; lmt_linebreak_state.background[total_filll_amount] = 0; lmt_linebreak_state.background[total_stretch_amount + glue_stretch_order(l)] = glue_stretch(l); lmt_linebreak_state.background[total_stretch_amount + glue_stretch_order(r)] += glue_stretch(r); lmt_linebreak_state.background[total_shrink_amount] = tex_aux_checked_shrink(l) + tex_aux_checked_shrink(r); } if (properties->adjust_spacing) { lmt_linebreak_state.background[font_stretch_amount] = 0; lmt_linebreak_state.background[font_shrink_amount] = 0; lmt_linebreak_state.max_stretch_ratio = -1; lmt_linebreak_state.max_shrink_ratio = -1; lmt_linebreak_state.current_font_step = -1; lmt_packaging_state.previous_char_ptr = null; } /*tex Check for special treatment of last line of paragraph. The new algorithm for the last line requires that the stretchability |par_fill_skip| is infinite and the stretchability of |left_skip| plus |right_skip| is finite. */ lmt_linebreak_state.do_last_line_fit = 0; if (properties->last_line_fit > 0) { halfword q = lmt_linebreak_state.last_line_fill; if (glue_stretch(q) > 0 && glue_stretch_order(q) > normal_glue_order) { if (lmt_linebreak_state.background[total_fi_amount] == 0 && lmt_linebreak_state.background[total_fil_amount] == 0 && lmt_linebreak_state.background[total_fill_amount] == 0 && lmt_linebreak_state.background[total_filll_amount] == 0) { lmt_linebreak_state.do_last_line_fit = 1; lmt_linebreak_state.fill_width[fi_order] = 0; lmt_linebreak_state.fill_width[fil_order] = 0; lmt_linebreak_state.fill_width[fill_order] = 0; lmt_linebreak_state.fill_width[filll_order] = 0; lmt_linebreak_state.fill_width[glue_stretch_order(q) - fi_glue_order] = glue_stretch(q); } } } /*tex Initialize |dir_ptr| for |line_break|. */ if (lmt_linebreak_state.dir_ptr) { tex_flush_node_list(lmt_linebreak_state.dir_ptr); lmt_linebreak_state.dir_ptr = null; } /*tex Find optimal breakpoints. */ lmt_linebreak_state.threshold = properties->pretolerance; if (properties->tracing_paragraphs > 1) { tex_begin_diagnostic(); tex_print_str("[linebreak: original]"); tex_short_display(node_next(temp_head)); tex_end_diagnostic(); } if (lmt_linebreak_state.threshold >= 0) { if (properties->tracing_paragraphs > 0) { tex_begin_diagnostic(); tex_print_str("[linebreak: first pass]"); /* @firstpass */ } lmt_linebreak_state.second_pass = 0; lmt_linebreak_state.final_pass = 0; } else { lmt_linebreak_state.threshold = properties->tolerance; lmt_linebreak_state.second_pass = 1; lmt_linebreak_state.final_pass = (properties->emergency_stretch <= 0); if (properties->tracing_paragraphs > 0) { tex_begin_diagnostic(); } } if (callback_id) { tex_aux_initialize_show_break_node(callback_id); } { halfword cur_p = null; int pass = 0; while (++pass) { halfword first_p = node_next(temp_head); cur_p = first_p; if (lmt_linebreak_state.threshold > infinite_bad) { lmt_linebreak_state.threshold = infinite_bad; } if (callback_id) { tex_aux_start_show_break_node(callback_id, pass); } /*tex Create an active breakpoint representing the beginning of the paragraph. */ { halfword initial = tex_new_node(unhyphenated_node, (quarterword) decent_fit); node_next(initial) = active_head; active_break_node(initial) = null; active_line_number(initial) = cur_list.prev_graf + 1; active_total_demerits(initial) = 0; // default active_short(initial) = 0; // default active_glue(initial) = 0; // default // active_reserved(initial) = 0; // default node_next(active_head) = initial; } /*tex We now have created a cycle. */ tex_aux_set_target_to_source(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.background); lmt_linebreak_state.passive = null; lmt_linebreak_state.printed_node = temp_head; lmt_linebreak_state.pass_number = 0; lmt_print_state.font_in_short_display = null_font; /*tex Create an active breakpoint representing the beginning of the paragraph. */ /* lmt_linebreak_state.auto_breaking = 1; */ /* gone */ // cur_p = node_next(temp_head); /*tex Initialize with first (or current) |par| node. */ if (cur_p && node_type(cur_p) == par_node) { node_prev(cur_p) = temp_head; lmt_linebreak_state.internal_penalty_interline = tex_get_local_interline_penalty(cur_p); lmt_linebreak_state.internal_penalty_broken = tex_get_local_broken_penalty(cur_p); lmt_linebreak_state.init_internal_left_box = par_box_left(cur_p); lmt_linebreak_state.init_internal_left_box_width = tex_get_local_left_width(cur_p); lmt_linebreak_state.internal_right_box = par_box_right(cur_p); lmt_linebreak_state.internal_right_box_width = tex_get_local_right_width(cur_p); lmt_linebreak_state.internal_middle_box = par_box_middle(cur_p); } else { lmt_linebreak_state.internal_penalty_interline = 0; lmt_linebreak_state.internal_penalty_broken = 0; lmt_linebreak_state.init_internal_left_box = null; lmt_linebreak_state.init_internal_left_box_width = 0; lmt_linebreak_state.internal_right_box = null; lmt_linebreak_state.internal_right_box_width = 0; lmt_linebreak_state.internal_middle_box = null; } lmt_linebreak_state.internal_left_box = lmt_linebreak_state.init_internal_left_box; lmt_linebreak_state.internal_left_box_width = lmt_linebreak_state.init_internal_left_box_width; lmt_packaging_state.previous_char_ptr = null; // first_p = cur_p; /*tex To access the first node of paragraph as the first active node has |break_node = null|. Determine legal breaks: As we move through the hlist, we need to keep the |active_width| array up to date, so that the badness of individual lines is readily calculated by |try_break|. It is convenient to use the short name |active_width [1]| for the component of active width that represents real width as opposed to glue. Advance |cur_p| to the node following the present string of characters. The code that passes over the characters of words in a paragraph is part of \TEX's inner loop, so it has been streamlined for speed. We use the fact that |\parfillskip| glue appears at the end of each paragraph; it is therefore unnecessary to check if |vlink (cur_p) = null| when |cur_p| is a character node. */ while (cur_p && (node_next(active_head) != active_head)) { /* we check the cycle */ switch (node_type(cur_p)) { case glyph_node: /* why ex here and not in add/sub disc glyphs */ lmt_linebreak_state.active_width[total_glue_amount] += tex_glyph_width_ex(cur_p); // ex if (properties->adjust_spacing && tex_aux_check_expand_pars(properties->adjust_spacing_step, glyph_font(cur_p))) { lmt_packaging_state.previous_char_ptr = cur_p; lmt_linebreak_state.active_width[font_stretch_amount] += tex_char_stretch(cur_p); lmt_linebreak_state.active_width[font_shrink_amount] += tex_char_shrink(cur_p); } break; case hlist_node: case vlist_node: lmt_linebreak_state.active_width[total_glue_amount] += box_width(cur_p); break; case rule_node: lmt_linebreak_state.active_width[total_glue_amount] += rule_width(cur_p); break; case dir_node: /*tex Adjust the dir stack for the |line_break| routine. */ line_break_dir = tex_update_dir_state(cur_p, properties->paragraph_dir); break; case par_node: /*tex Advance past a |par| node. */ lmt_linebreak_state.internal_penalty_interline = tex_get_local_interline_penalty(cur_p); lmt_linebreak_state.internal_penalty_broken = tex_get_local_broken_penalty(cur_p); lmt_linebreak_state.internal_left_box = par_box_left(cur_p); lmt_linebreak_state.internal_left_box_width = tex_get_local_left_width(cur_p); lmt_linebreak_state.internal_right_box = par_box_right(cur_p); lmt_linebreak_state.internal_right_box_width = tex_get_local_right_width(cur_p); lmt_linebreak_state.internal_middle_box = par_box_middle(cur_p); break; case glue_node: /*tex If node |cur_p| is a legal breakpoint, call |try_break|; then update the active widths by including the glue in |glue_ptr(cur_p)|. When node |cur_p| is a glue node, we look at the previous to see whether or not a breakpoint is legal at |cur_p|, as explained above. We only break after certain nodes (see texnodes.h), a font related kern and a dir node when |\breakafterdirmode = 1|. */ if (tex_has_glue_option(cur_p, glue_option_no_auto_break)) { /*tex Glue in math is not a valid breakpoint, unless we permit it. */ } else if (tex_is_par_init_glue(cur_p)) { /*tex Of course we don't break here. */ } else if (tex_aux_valid_glue_break(cur_p)) { tex_aux_try_break(properties, tex_aux_upcoming_penalty(cur_p), unhyphenated_node, first_p, cur_p, callback_id, pass); } lmt_linebreak_state.active_width[total_glue_amount] += glue_amount(cur_p); lmt_linebreak_state.active_width[total_stretch_amount + glue_stretch_order(cur_p)] += glue_stretch(cur_p); lmt_linebreak_state.active_width[total_shrink_amount] += tex_aux_checked_shrink(cur_p); break; case kern_node: switch (node_subtype(cur_p)) { case explicit_kern_subtype: case italic_kern_subtype: { /* there used to a ! is_char_node(node_next(cur_p)) test */ halfword nxt = node_next(cur_p); if (nxt && node_type(nxt) == glue_node && ! tex_has_glue_option(nxt, glue_option_no_auto_break)) { tex_aux_try_break(properties, 0, unhyphenated_node, first_p, cur_p, callback_id, pass); } } break; case font_kern_subtype: if (properties->adjust_spacing == adjust_spacing_full) { lmt_linebreak_state.active_width[font_stretch_amount] += tex_kern_stretch(cur_p); lmt_linebreak_state.active_width[font_shrink_amount] += tex_kern_shrink(cur_p); } break; } lmt_linebreak_state.active_width[total_glue_amount] += kern_amount(cur_p); break; case disc_node: /*tex Try to break after a discretionary fragment, then |goto done5|. The following code knows that discretionary texts contain only character nodes, kern nodes, box nodes, and rule nodes. This branch differs a bit from older engines because in \LUATEX\ we already have hyphenated the list. This means that we need to skip automatic disc nodes. Or better, we need to treat discretionaries and explicit hyphens always, even in the first pass. We used to have |init_disc| followed by |select disc| variants where the |select_disc|s were handled by the leading |init_disc|. The question is: should we bother about select nodes? Knuth indicates in the original source that only a very few cases need hyphenation so the exceptional case of >2 char ligatures having hyphenation points in between is rare. We'd better have proper compound word handling. Keep in mind that these (old) init and select subtypes always came in isolated pairs and that they only were meant for the simple (enforced) hyphenation discretionaries. Therefore, this feature has been dropped from \LUAMETATEX. It not only makes the code simpler, it also avoids having code on board for border cases that even when dealt with are suboptimal. It's better to have nothing that something fuzzy. It also makes dealing with (intermediate) node lists easier. If I want something like this it should be okay for any situation. */ if (force_check_hyphenation || lmt_linebreak_state.second_pass || (node_subtype(cur_p) != syllable_discretionary_code)) { halfword actual_penalty = disc_penalty(cur_p); halfword pre = disc_pre_break_head(cur_p); tex_aux_reset_disc_target(properties->adjust_spacing, lmt_linebreak_state.disc_width); if (pre) { tex_aux_add_to_widths(pre, properties->adjust_spacing, properties->adjust_spacing_step, lmt_linebreak_state.disc_width); tex_aux_add_disc_source_to_target(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.disc_width); tex_aux_try_break(properties, actual_penalty, hyphenated_node, first_p, cur_p, callback_id, pass); tex_aux_sub_disc_target_from_source(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.disc_width); } else { /*tex trivial pre-break */ tex_aux_try_break(properties, actual_penalty, hyphenated_node, first_p, cur_p, callback_id, pass); } } tex_aux_add_to_widths(disc_no_break_head(cur_p), properties->adjust_spacing, properties->adjust_spacing_step, lmt_linebreak_state.active_width); break; case penalty_node: tex_aux_try_break(properties, penalty_amount(cur_p), unhyphenated_node, first_p, cur_p, callback_id, pass); break; case math_node: { /* there used to a ! is_char_node(node_next(cur_p)) test */ int finishing = node_subtype(cur_p) == end_inline_math; // lmt_linebreak_state.auto_breaking = finishing; if (tex_math_glue_is_zero(cur_p) || tex_ignore_math_skip(cur_p)) { /*tex When we end up here we assume |\mathsurround| but we only check for a break when we're ending math. Maybe this is something we need to open up. The math specific penalty only kicks in when we break. */ if (finishing && node_type(node_next(cur_p)) == glue_node) { tex_aux_try_break(properties, math_penalty(cur_p), unhyphenated_node, first_p, cur_p, callback_id, pass); } lmt_linebreak_state.active_width[total_glue_amount] += math_surround(cur_p); } else { /*tex This one does quite some testing, is that still needed? */ if (finishing && tex_aux_valid_glue_break(cur_p)) { tex_aux_try_break(properties, math_penalty(cur_p), unhyphenated_node, first_p, cur_p, callback_id, pass); } lmt_linebreak_state.active_width[total_glue_amount] += math_amount(cur_p); lmt_linebreak_state.active_width[total_stretch_amount + math_stretch_order(cur_p)] += math_stretch(cur_p); lmt_linebreak_state.active_width[total_shrink_amount] += tex_aux_checked_shrink(cur_p); } } break; case boundary_node: case whatsit_node: case mark_node: case insert_node: case adjust_node: /*tex Advance past these nodes in the |line_break| loop. */ break; default: tex_formatted_error("parbuilder", "weird node %d in paragraph", node_type(cur_p)); } cur_p = node_next(cur_p); } if (! cur_p) { /*tex Try the final line break at the end of the paragraph, and |goto done| if the desired breakpoints have been found. The forced line break at the paragraph's end will reduce the list of breakpoints so that all active nodes represent breaks at |cur_p = null|. On the first pass, we insist on finding an active node that has the correct \quote {looseness.} On the final pass, there will be at least one active node, and we will match the desired looseness as well as we can. The global variable |best_bet| will be set to the active node for the best way to break the paragraph, and a few other variables are used to help determine what is best. */ tex_aux_try_break(properties, eject_penalty, hyphenated_node, first_p, cur_p, callback_id, pass); if (node_next(active_head) != active_head) { /*tex Find an active node with fewest demerits. */ halfword r = node_next(active_head); lmt_linebreak_state.fewest_demerits = awful_bad; do { if ((node_type(r) != delta_node) && (active_total_demerits(r) < lmt_linebreak_state.fewest_demerits)) { lmt_linebreak_state.fewest_demerits = active_total_demerits(r); lmt_linebreak_state.best_bet = r; } r = node_next(r); } while (r != active_head); lmt_linebreak_state.best_line = active_line_number(lmt_linebreak_state.best_bet); /*tex Find an active node with fewest demerits. */ if (properties->looseness == 0) { goto DONE; } else { /*tex Find the best active node for the desired looseness. The adjustment for a desired looseness is a slightly more complicated version of the loop just considered. Note that if a paragraph is broken into segments by displayed equations, each segment will be subject to the looseness calculation, independently of the other segments. */ r = node_next(active_head); // can be local lmt_linebreak_state.actual_looseness = 0; do { if (node_type(r) != delta_node) { lmt_linebreak_state.line_difference = active_line_number(r) - lmt_linebreak_state.best_line; if (((lmt_linebreak_state.line_difference < lmt_linebreak_state.actual_looseness) && (properties->looseness <= lmt_linebreak_state.line_difference)) || ((lmt_linebreak_state.line_difference > lmt_linebreak_state.actual_looseness) && (properties->looseness >= lmt_linebreak_state.line_difference))) { lmt_linebreak_state.best_bet = r; lmt_linebreak_state.actual_looseness = lmt_linebreak_state.line_difference; lmt_linebreak_state.fewest_demerits = active_total_demerits(r); } else if ((lmt_linebreak_state.line_difference == lmt_linebreak_state.actual_looseness) && (active_total_demerits(r) < lmt_linebreak_state.fewest_demerits)) { lmt_linebreak_state.best_bet = r; lmt_linebreak_state.fewest_demerits = active_total_demerits(r); } } r = node_next(r); } while (r != active_head); lmt_linebreak_state.best_line = active_line_number(lmt_linebreak_state.best_bet); /*tex Find the best active node for the desired looseness. */ if ((lmt_linebreak_state.actual_looseness == properties->looseness) || lmt_linebreak_state.final_pass) { goto DONE; } } } } else { /*tex So we have cycled: |node_next(active_head) == active_head|. */ } /*tex Clean up the memory by removing the break nodes. */ cur_p = tex_aux_clean_up_the_memory(cur_p); if (! lmt_linebreak_state.second_pass) { if (properties->tracing_paragraphs > 0) { tex_print_format("%l[linebreak: second pass]"); /* @secondpass */; } lmt_linebreak_state.threshold = properties->tolerance; lmt_linebreak_state.second_pass = 1; lmt_linebreak_state.final_pass = (properties->emergency_stretch <= 0); } else { /*tex If at first you do not succeed, then: */ if (properties->tracing_paragraphs > 0) { tex_print_format("%l[linebreak: emergency pass]"); /* @emergencypass */ } lmt_linebreak_state.background[total_stretch_amount] += properties->emergency_stretch; lmt_linebreak_state.final_pass = 1; } if (callback_id) { tex_aux_stop_show_break_node(callback_id); } } DONE: if (properties->tracing_paragraphs > 0) { tex_end_diagnostic(); /*tex This is a bit weird, as only here: |normalize_selector()| while we have diagnostics all over the place. */ } if (lmt_linebreak_state.do_last_line_fit) { /*tex Adjust the final line of the paragraph; here we either reset |do_last_line_fit| or adjust the |par_fill_skip| glue. */ if (active_short(lmt_linebreak_state.best_bet) == 0) { lmt_linebreak_state.do_last_line_fit = 0; } else { glue_amount(lmt_linebreak_state.last_line_fill) += (active_short(lmt_linebreak_state.best_bet) - active_glue(lmt_linebreak_state.best_bet)); glue_stretch(lmt_linebreak_state.last_line_fill) = 0; } } /*tex Break the paragraph at the chosen. Once the best sequence of breakpoints has been found (hurray), we call on the procedure |post_line_break| to finish the remainder of the work. By introducing this subprocedure, we are able to keep |line_break| from getting extremely long. The first thing |ext_post_line_break| does is reset |dir_ptr|. */ tex_flush_node_list(lmt_linebreak_state.dir_ptr); lmt_linebreak_state.dir_ptr = null; /*tex Here we still have a temp node as head. */ tex_aux_post_line_break(properties, line_break_dir, callback_id); /*tex Clean up memory by removing the break nodes (maybe: |tex_flush_node_list(cur_p);|). */ tex_aux_clean_up_the_memory(cur_p); } if (callback_id) { tex_aux_wrapup_show_break_node(callback_id); } } void tex_get_linebreak_info(int *f, int *a) { *f = lmt_linebreak_state.fewest_demerits; *a = lmt_linebreak_state.actual_looseness; } /*tex So far we have gotten a little way into the |line_break| routine, having covered its important |try_break| subroutine. Now let's consider the rest of the process. The main loop of |line_break| traverses the given hlist, starting at |vlink (temp_head)|, and calls |try_break| at each legal breakpoint. A variable called |auto_breaking| is set to true except within math formulas, since glue nodes are not legal breakpoints when they appear in formulas. The current node of interest in the hlist is pointed to by |cur_p|. Another variable, |prev_p|, is usually one step behind |cur_p|, but the real meaning of |prev_p| is this: If |type (cur_p) = glue_node| then |cur_p| is a legal breakpoint if and only if |auto_breaking| is true and |prev_p| does not point to a glue node, penalty node, explicit kern node, or math node. The total number of lines that will be set by |post_line_break| is |best_line - prev_graf - 1|. The last breakpoint is specified by |break_node (best_bet)|, and this passive node points to the other breakpoints via the |prev_break| links. The finishing-up phase starts by linking the relevant passive nodes in forward order, changing |prev_break| to |next_break|. (The |next_break| fields actually reside in the same memory space as the |prev_break| fields did, but we give them a new name because of their new significance.) Then the lines are justified, one by one. The |post_line_break| must also keep an dir stack, so that it can output end direction instructions at the ends of lines and begin direction instructions at the beginnings of lines. */ /*tex The new name for |prev_break| after links are reversed: */ # define passive_next_break passive_prev_break /*tex The |int|s are actually |halfword|s or |scaled|s. */ static void tex_aux_trace_penalty(const char *what, int line, int index, halfword penalty, halfword total) { if (tracing_penalties_par > 0) { tex_begin_diagnostic(); tex_print_format("[linebreak: %s penalty, line %i, index %i, adding %i, total %i]", what, line, index, penalty, total); tex_end_diagnostic(); } } static void tex_aux_post_line_break(const line_break_properties *properties, halfword line_break_dir, int callback_id) { /*tex temporary registers for list manipulation */ halfword q, r; halfword ls = null; halfword rs = null; /*tex was a break at glue? */ int glue_break; /*tex are we in some shape */ int shaping = 0; /*tex was the current break at a discretionary node? */ int disc_break; /*tex and did it have a nonempty post-break part? */ int post_disc_break; /*tex width of line number |cur_line| */ scaled cur_width; /*tex left margin of line number |cur_line| */ scaled cur_indent; /*tex |cur_p| will become the first breakpoint; */ halfword cur_p = null; /*tex the current line number being justified */ halfword cur_line; /*tex this saves calculations: */ int last_line = 0; int first_line = 0; /*tex the current direction: */ lmt_linebreak_state.dir_ptr = cur_list.direction_stack; /*tex Reverse the links of the relevant passive nodes, setting |cur_p| to the first breakpoint. The job of reversing links in a list is conveniently regarded as the job of taking items off one stack and putting them on another. In this case we take them off a stack pointed to by |q| and having |prev_break| fields; we put them on a stack pointed to by |cur_p| and having |next_break| fields. Node |r| is the passive node being moved from stack to stack. */ if (callback_id) { tex_aux_collect_show_break_node(callback_id); } q = active_break_node(lmt_linebreak_state.best_bet); do { r = q; q = passive_prev_break(q); passive_next_break(r) = cur_p; cur_p = r; } while (q); if (callback_id) { halfword p = cur_p; while (p) { tex_aux_list_break_node(p, callback_id); p = passive_next_break(p); } } /*tex prevgraf + 1 */ cur_line = cur_list.prev_graf + 1; do { /*tex Justify the line ending at breakpoint |cur_p|, and append it to the current vertical list, together with associated penalties and other insertions. The current line to be justified appears in a horizontal list starting at |vlink (temp_head)| and ending at |cur_break (cur_p)|. If |cur_break (cur_p)| is a glue node, we reset the glue to equal the |right_skip| glue; otherwise we append the |right_skip| glue at the right. If |cur_break (cur_p)| is a discretionary node, we modify the list so that the discretionary break is compulsory, and we set |disc_break| to |true|. We also append the |left_skip| glue at the left of the line, unless it is zero. */ /*tex We want to get rid of it. */ halfword cur_disc = null; /*tex Local left and right boxes come from \OMEGA\ but have been adapted and extended. */ halfword leftbox = null; halfword rightbox = null; halfword middlebox = null; if (lmt_linebreak_state.dir_ptr) { /*tex Insert dir nodes at the beginning of the current line. */ for (halfword q = lmt_linebreak_state.dir_ptr; q; q = node_next(q)) { halfword tmp = tex_new_dir(normal_dir_subtype, dir_direction(q)); halfword nxt = node_next(temp_head); tex_attach_attribute_list_copy(tmp, nxt ? nxt : temp_head); tex_couple_nodes(temp_head, tmp); /*tex |\break\par| */ tex_try_couple_nodes(tmp, nxt); } tex_flush_node_list(lmt_linebreak_state.dir_ptr); lmt_linebreak_state.dir_ptr = null; } /*tex Modify the end of the line to reflect the nature of the break and to include |\rightskip|; also set the proper value of |disc_break|. At the end of the following code, |q| will point to the final node on the list about to be justified. In the meanwhile |r| will point to the node we will use to insert end-of-line stuff after. |q == null| means we use the final position of |r|. */ /*tex begin mathskip code */ q = temp_head; while (q) { switch (node_type(q)) { case glyph_node: goto DONE; case hlist_node: if (node_subtype(q) == indent_list) { break; } else { goto DONE; } case glue_node: if (tex_is_par_init_glue(q)) { break; } else { goto DONE; } case kern_node: if (node_subtype(q) != explicit_kern_subtype && node_subtype(q) != italic_kern_subtype) { goto DONE; } else { break; } case math_node: math_surround(q) = 0; tex_reset_math_glue_to_zero(q); goto DONE; default: if (non_discardable(q)) { goto DONE; } else { break; } } q = node_next(q); } DONE: /*tex end mathskip code */ r = passive_cur_break(cur_p); q = null; disc_break = 0; post_disc_break = 0; glue_break = 0; if (r) { switch (node_type(r)) { case glue_node: tex_copy_glue_values(r, properties->right_skip); node_subtype(r) = right_skip_glue; glue_break = 1; /*tex |q| refers to the last node of the line */ q = r; rs = q; r = node_prev(r); /*tex |r| refers to the node after which the dir nodes should be closed */ break; case disc_node: { halfword prv = node_prev(r); halfword nxt = node_next(r); halfword h = disc_no_break_head(r); if (h) { tex_flush_node_list(h); disc_no_break_head(r) = null; disc_no_break_tail(r) = null; } h = disc_pre_break_head(r); if (h) { halfword t = disc_pre_break_tail(r); tex_set_discpart(r, h, t, glyph_discpart_pre); tex_couple_nodes(prv, h); tex_couple_nodes(t, r); disc_pre_break_head(r) = null; disc_pre_break_tail(r) = null; } h = disc_post_break_head(r); if (h) { halfword t = disc_post_break_tail(r); tex_set_discpart(r, h, t, glyph_discpart_post); tex_couple_nodes(r, h); tex_couple_nodes(t, nxt); disc_post_break_head(r) = null; disc_post_break_tail(r) = null; post_disc_break = 1; } cur_disc = r; disc_break = 1; } break; case kern_node: kern_amount(r) = 0; break; case math_node : math_surround(r) = 0; tex_reset_math_glue_to_zero(r); break; } } else { /*tex Again a tail run ... maybe combine. */ // for (r = temp_head; node_next(r); r = node_next(r)); r = tex_tail_of_node_list(temp_head); /*tex Now we're at the end. */ if (r == properties->parfill_right_skip) { /*tex This should almost always be true... */ q = r; /*tex |q| refers to the last node of the line (and paragraph) */ r = node_prev(r); } /*tex |r| refers to the node after which the dir nodes should be closed */ } /*tex Adjust the dir stack based on dir nodes in this line. */ line_break_dir = tex_sanitize_dir_state(node_next(temp_head), passive_cur_break(cur_p), properties->paragraph_dir); /*tex Insert dir nodes at the end of the current line. */ r = tex_complement_dir_state(r); /*tex Modify the end of the line to reflect the nature of the break and to include |\rightskip|; also set the proper value of |disc_break|; Also put the |\leftskip| glue at the left and detach this line. The following code begins with |q| at the end of the list to be justified. It ends with |q| at the beginning of that list, and with |node_next(temp_head)| pointing to the remainder of the paragraph, if any. Now [q] refers to the last node on the line and therefore the rightmost breakpoint. The only exception is the case of a discretionary break with non-empty |pre_break|, then |q| s been changed to the last node of the |pre_break| list. If the par ends with a |\break| command, the last line is utterly empty. That is the case of |q == temp_head|. This code needs to be cleaned up as we now have protrusion and boxes at the edges to deal with. Old hybrid code. */ leftbox = tex_use_local_boxes(passive_left_box(cur_p), local_left_box_code); rightbox = tex_use_local_boxes(passive_right_box(cur_p), local_right_box_code); middlebox = tex_use_local_boxes(passive_middle_box(cur_p), local_middle_box_code); /*tex First we append the right box. It is part of the content so inside the skips. */ if (rightbox) { halfword nxt = node_next(r); tex_couple_nodes(r, rightbox); tex_try_couple_nodes(rightbox, nxt); r = rightbox; } if (middlebox) { /*tex These middle boxes might become more advanced as we can process them by a pass over the line so that we retain the spot but then, we also loose that with left and right, so why bother. It would also complicate uniqueness. */ halfword nxt = node_next(r); tex_couple_nodes(r, middlebox); tex_try_couple_nodes(middlebox, nxt); r = middlebox; } if (! q) { q = r; } if (q != temp_head && properties->protrude_chars) { if (line_break_dir == dir_righttoleft && properties->protrude_chars == protrude_chars_advanced) { halfword p = q; halfword l = null; /*tex Backtrack over the last zero glues and dirs. */ while (p) { switch (node_type(p)) { case dir_node: if (node_subtype(p) != cancel_dir_subtype) { goto DONE1; } else { break; } case glue_node: if (glue_amount(p)) { goto DONE3; } else { break; } case glyph_node: goto DONE1; default: goto DONE3; } p = node_prev(p); } DONE1: /*tex When |p| is non zero we have something. */ while (p) { switch (node_type(p)) { case glyph_node: l = p ; break; case glue_node: if (glue_amount(p)) { l = null; } break; case dir_node: if (dir_direction(p) != dir_righttoleft) { goto DONE3; } else { goto DONE2; } case par_node: goto DONE2; case temp_node: /*tex Go on. */ break; default: l = null; break; } p = node_prev(p); } DONE2: /*tex Time for action. */ if (l && p) { scaled w = tex_char_protrusion(l, right_margin_kern_subtype); halfword k = tex_new_kern_node(-w, right_margin_kern_subtype); tex_attach_attribute_list_copy(k, l); tex_couple_nodes(p, k); tex_couple_nodes(k, l); } } else { scaled w = 0; halfword p, ptmp; if (disc_break && (node_type(q) == glyph_node || node_type(q) != disc_node)) { /*tex |q| is reset to the last node of |pre_break| */ p = q; } else { /*tex get |node_next(p) = q| */ p = node_prev(q); } ptmp = p; p = tex_aux_find_protchar_right(node_next(temp_head), p); w = tex_char_protrusion(p, right_margin_kern_subtype); if (w && lmt_packaging_state.last_rightmost_char) { /*tex we have found a marginal kern, append it after |ptmp| */ halfword k = tex_new_kern_node(-w, right_margin_kern_subtype); tex_attach_attribute_list_copy(k, p); tex_try_couple_nodes(k, node_next(ptmp)); tex_couple_nodes(ptmp, k); if (ptmp == q) { q = node_next(q); } } } } DONE3: /*tex If |q| was not a breakpoint at glue and has been reset to |rightskip| then we append |rightskip| after |q| now? */ if (glue_break) { /*tex A rightskip has already been added. */ } else { /*tex We add one, even when zero. */ halfword g = tex_new_glue_node(properties->right_skip ? properties->right_skip : zero_glue, right_skip_glue); tex_attach_attribute_list_copy(g, q); /* or next of it? or q */ tex_try_couple_nodes(g, node_next(q)); tex_couple_nodes(q, g); q = g; } rs = q; /*tex More preparations. */ r = node_next(q); node_next(q) = null; q = node_next(temp_head); tex_try_couple_nodes(temp_head, r); /*tex Now we prepend the left box. It is part of the content so inside the skips. */ if (leftbox) { halfword nxt = node_next(q); tex_couple_nodes(leftbox, q); q = leftbox; if (nxt && (cur_line == cur_list.prev_graf + 1) && (node_type(nxt) == hlist_node) && ! box_list(nxt)) { /* what is special about an empty hbox, needs checking */ q = node_next(q); tex_try_couple_nodes(leftbox, node_next(nxt)); tex_try_couple_nodes(nxt, leftbox); } } /*tex At this point |q| is the leftmost node; all discardable nodes have been discarded. */ if (properties->protrude_chars) { if (line_break_dir == dir_righttoleft && properties->protrude_chars == protrude_chars_advanced) { halfword p = tex_aux_find_protchar_left(q, 0); halfword w = tex_char_protrusion(p, right_margin_kern_subtype); if (w && lmt_packaging_state.last_leftmost_char) { halfword k = tex_new_kern_node(-w, right_margin_kern_subtype); tex_attach_attribute_list_copy(k, p); tex_couple_nodes(k, q); q = k; } } else { halfword p = tex_aux_find_protchar_left(q, 0); halfword w = tex_char_protrusion(p, left_margin_kern_subtype); if (w && lmt_packaging_state.last_leftmost_char) { halfword k = tex_new_kern_node(-w, left_margin_kern_subtype); tex_attach_attribute_list_copy(k, p); tex_couple_nodes(k, q); q = k; } } } /*tex Fix a possible mess up. */ if (node_type(q) == par_node && ! tex_is_start_of_par_node(q)) { node_subtype(q) = hmode_par_par_subtype ; } /*tex Put the |\leftskip| glue at the left and detach this line. Call the packaging subroutine, setting |just_box| to the justified box. Now|q| points to the hlist that represents the current line of the paragraph. We need to compute the appropriate line width, pack the line into a box of this size, and shift the box by the appropriate amount of indentation. In \LUAMETATEX\ we always add the leftskip. */ r = tex_new_glue_node(properties->left_skip, left_skip_glue); tex_attach_attribute_list_copy(r, q); tex_couple_nodes(r, q); q = r; ls = q; /*tex We have these |par| nodes that, when we have callbacks, kind of polute the list. Let's get rid of them now. We could have done this in previous loops but for the sake of clearity we do it here. That way we keep the existing code as it is in older engines. Okay, I might collapse it eventually. This is code that has been prototyped using \LUA. */ if (cur_line > lmt_linebreak_state.last_special_line) { // && (! (properties->par_shape && specification_repeat(properties->par_shape)))) { cur_width = lmt_linebreak_state.second_width; cur_indent = lmt_linebreak_state.second_indent; } else if (properties->par_shape) { if (specification_count(properties->par_shape)) { cur_indent = tex_get_specification_indent(properties->par_shape, cur_line); cur_width = tex_get_specification_width(properties->par_shape, cur_line); cur_indent = swap_parshape_indent(properties->paragraph_dir, cur_indent, cur_width); } else { cur_width = lmt_linebreak_state.first_width; cur_indent = lmt_linebreak_state.first_indent; } } else { cur_width = lmt_linebreak_state.first_width; cur_indent = lmt_linebreak_state.first_indent; } /*tex When we have a left hang, the width is the (hsize-hang) and there is a shift if hang applied. The overall linewidth is hsize. When we vbox the result, we get a box with width hsize. When we have a right hang, the width is the (hsize-hang) and therefore we end up with a box that is less that the hsize. When we vbox the result, we get a box with width hsize minus the hang, so definitely not consistent with the previous case. In both cases we can consider the hang to be at the edge, simply because the whole lot gets packaged and then shift gets applied. Although, for practical reasons we could decide to put it after the left and before the right skips, which actually opens up some options. Anyway, after a period of nasty heuristics we can now do a better job because we still have the information that we started with. */ first_line = rs && (cur_line == 1) && properties->parinit_left_skip && properties->parinit_right_skip; if (first_line) { halfword n = node_next(properties->parinit_left_skip); while (n) { if (n == properties->parinit_right_skip) { tex_couple_nodes(node_prev(n), node_next(n)); tex_couple_nodes(node_prev(rs), n); tex_couple_nodes(n, rs); break; } else { n = node_next(n); } } if (! n && normalize_line_mode_par) { /*tex For the moment: */ tex_normal_warning("tex", "right parinit skip is gone"); } } last_line = ls && (cur_line + 1 == lmt_linebreak_state.best_line) && properties->parfill_left_skip && properties->parfill_right_skip; if (last_line) { halfword n = node_prev(properties->parfill_right_skip); while (n) { if (n == properties->parfill_left_skip) { tex_couple_nodes(node_prev(n), node_next(n)); tex_couple_nodes(n, node_next(ls)); tex_couple_nodes(ls, n); break; } else { n = node_prev(n); } } if (! n && normalize_line_mode_par) { /*tex For the moment: */ tex_normal_warning("tex", "left parfill skip is gone"); } } /*tex Some housekeeping. */ lmt_packaging_state.post_adjust_tail = post_adjust_head; lmt_packaging_state.pre_adjust_tail = pre_adjust_head; lmt_packaging_state.post_migrate_tail = post_migrate_head; lmt_packaging_state.pre_migrate_tail = pre_migrate_head; /*tex A bonus feature. */ if (normalize_line_mode_permitted(normalize_line_mode_par, flatten_discretionaries_mode)) { int count = 0; q = tex_flatten_discretionaries(q, &count, 0); /* there is no need to nest */ cur_disc = null; if (properties->tracing_paragraphs > 1) { tex_begin_diagnostic(); tex_print_format("[linebreak: flatten, line %i, count %i]", cur_line, count); tex_end_diagnostic(); } } /*tex Finally we pack the lot. */ shaping = 0; if (normalize_line_mode_permitted(normalize_line_mode_par, normalize_line_mode)) { halfword head = q; halfword tail = rs ? rs : head; halfword lefthang = 0; halfword righthang = 0; // we already have the tail somewhere while (node_next(tail)) { tail = node_next(tail); } if (properties->par_shape) { int n = specification_count(properties->par_shape); if (n > 0) { if (specification_repeat(properties->par_shape)) { n = cur_line; } else { n = cur_line > n ? n : cur_line; } lefthang = tex_get_specification_indent(properties->par_shape, n); righthang = properties->hsize - lefthang - tex_get_specification_width(properties->par_shape, n); // lefthang = swap_parshape_indent(paragraph_dir, lefthang, width); // or so } } else if (properties->hang_after) { if (properties->hang_after > 0 && cur_line > properties->hang_after) { if (properties->hang_indent < 0) { righthang = -properties->hang_indent; } if (properties->hang_indent > 0) { lefthang = properties->hang_indent; } } else if (properties->hang_after < 0 && cur_line <= -properties->hang_after) { if (properties->hang_indent < 0) { righthang = -properties->hang_indent; } if (properties->hang_indent > 0) { lefthang = properties->hang_indent; } } } shaping = (lefthang || righthang); lmt_linebreak_state.just_box = tex_hpack(head, cur_width, properties->adjust_spacing ? packing_linebreak : packing_exactly, (singleword) properties->paragraph_dir, holding_none_option); // attach_attribute_list_copy(linebreak_state.just_box, properties->initial_par); if (normalize_line_mode_permitted(normalize_line_mode_par, flatten_h_leaders_mode)) { tex_flatten_leaders(lmt_linebreak_state.just_box, NULL); } if (node_type(tail) != glue_node || node_subtype(tail) != right_skip_glue) { halfword rs = tex_new_glue_node((properties->right_skip ? properties->right_skip : zero_glue), right_skip_glue); tex_attach_attribute_list_copy(rs, tail); tex_try_couple_nodes(rs, node_next(q)); tex_couple_nodes(tail, rs); tail = rs; } { halfword lh = tex_new_glue_node(zero_glue, left_hang_skip_glue); halfword rh = tex_new_glue_node(zero_glue, right_hang_skip_glue); glue_amount(lh) = lefthang; glue_amount(rh) = righthang; tex_attach_attribute_list_copy(lh, head); tex_attach_attribute_list_copy(rh, tail); tex_try_couple_nodes(lh, head); tex_try_couple_nodes(tail, rh); head = lh; tail = rh; } /*tex This is kind of special. Instead of using |cur_width| also on an overfull box as well as shifts, we want \quote {real} dimensions. A disadvantage is that we need to adapt analyzers that assume this correction not being there (unpack and repack). So we have a flag to control it. */ if (normalize_line_mode_permitted(normalize_line_mode_par, clip_width_mode)) { if (lmt_packaging_state.last_overshoot) { halfword g = tex_new_glue_node(zero_glue, correction_skip_glue); glue_amount(g) = -lmt_packaging_state.last_overshoot; tex_attach_attribute_list_copy(g, rs); tex_try_couple_nodes(node_prev(rs), g); tex_try_couple_nodes(g, rs); } box_width(lmt_linebreak_state.just_box) = properties->hsize; } box_list(lmt_linebreak_state.just_box) = head; q = head; /*tex So only callback when we normalize. */ if (leftbox || rightbox || middlebox) { halfword linebox = lmt_linebreak_state.just_box; lmt_local_box_callback( linebox, leftbox, rightbox, middlebox, cur_line, tex_effective_glue(linebox, properties->left_skip), tex_effective_glue(linebox, properties->right_skip), lefthang, righthang, cur_indent, (first_line && properties->parinit_left_skip) ? tex_effective_glue(linebox, properties->parinit_left_skip) : null, (first_line && properties->parinit_right_skip) ? tex_effective_glue(linebox, properties->parinit_right_skip) : null, (last_line && properties->parfill_left_skip) ? tex_effective_glue(linebox, properties->parfill_left_skip) : null, (last_line && properties->parfill_right_skip) ? tex_effective_glue(linebox, properties->parfill_right_skip) : null, lmt_packaging_state.last_overshoot ); } } else { /*tex Here we can have a right skip way to the right due to an overshoot! */ lmt_linebreak_state.just_box = tex_hpack(q, cur_width, properties->adjust_spacing ? packing_linebreak : packing_exactly, (singleword) properties->paragraph_dir, holding_none_option); // attach_attribute_list_copy(linebreak_state.just_box, properties->initial_par); if (normalize_line_mode_permitted(normalize_line_mode_par, flatten_h_leaders_mode)) { tex_flatten_leaders(lmt_linebreak_state.just_box, NULL); } box_shift_amount(lmt_linebreak_state.just_box) = cur_indent; } /*tex Call the packaging subroutine, setting |just_box| to the justified box. */ node_subtype(lmt_linebreak_state.just_box) = line_list; if (callback_id) { tex_aux_line_show_break_node(callback_id); } /*tex Pending content (callback). */ if (node_next(contribute_head)) { if (! lmt_page_builder_state.output_active) { lmt_append_line_filter_callback(pre_box_append_line_context, 0); } } /* Pre-adjust content (no callback). */ if (pre_adjust_head != lmt_packaging_state.pre_adjust_tail) { tex_inject_adjust_list(pre_adjust_head, 1, lmt_linebreak_state.just_box, properties); } lmt_packaging_state.pre_adjust_tail = null; /* Pre-migrate content (callback). */ if (pre_migrate_head != lmt_packaging_state.pre_migrate_tail) { tex_append_list(pre_migrate_head, lmt_packaging_state.pre_migrate_tail); if (! lmt_page_builder_state.output_active) { lmt_append_line_filter_callback(pre_migrate_append_line_context, 0); } } lmt_packaging_state.pre_migrate_tail = null; /* Line content (callback). */ tex_append_to_vlist(lmt_linebreak_state.just_box, lua_key_index(post_linebreak), properties); if (! lmt_page_builder_state.output_active) { /* Here we could use the par specific baselineskip and lineskip. */ lmt_append_line_filter_callback(box_append_line_context, 0); } /* Post-migrate content (callback). */ if (post_migrate_head != lmt_packaging_state.post_migrate_tail) { tex_append_list(post_migrate_head, lmt_packaging_state.post_migrate_tail); if (! lmt_page_builder_state.output_active) { lmt_append_line_filter_callback(post_migrate_append_line_context, 0); } } lmt_packaging_state.post_migrate_tail = null; /* Post-adjust content (callback). */ if (post_adjust_head != lmt_packaging_state.post_adjust_tail) { tex_inject_adjust_list(post_adjust_head, 1, null, properties); } lmt_packaging_state.post_adjust_tail = null; /*tex Append the new box to the current vertical list, followed by the list of special nodes taken out of the box by the packager. Append a penalty node, if a nonzero penalty is appropriate. Penalties between the lines of a paragraph come from club and widow lines, from the |inter_line_penalty| parameter, and from lines that end at discretionary breaks. Breaking between lines of a two-line paragraph gets both club-line and widow-line penalties. The local variable |pen| will be set to the sum of all relevant penalties for the current line, except that the final line is never penalized. */ if (cur_line + 1 != lmt_linebreak_state.best_line) { /*tex When we end up here we hale multiple lines so we need to add penalties between them according to (several) specifications. */ halfword pen = 0; halfword spm = properties->shaping_penalties_mode; if (! spm) { shaping = 0; } if (tracing_penalties_par > 0) { tex_begin_diagnostic(); tex_print_format("[linebreak: penalty, line %i, best line %i, prevgraf %i, mode %x (i=%i c=%i w=%i b=%i)]", cur_line, lmt_linebreak_state.best_line, cur_list.prev_graf, spm, is_shaping_penalties_mode(spm, inter_line_penalty_shaping), is_shaping_penalties_mode(spm, club_penalty_shaping), is_shaping_penalties_mode(spm, widow_penalty_shaping), is_shaping_penalties_mode(spm, broken_penalty_shaping) ); tex_end_diagnostic(); } if (! (shaping && is_shaping_penalties_mode(spm, inter_line_penalty_shaping))) { halfword p; q = properties->inter_line_penalties; if (q) { r = cur_line; if (r > specification_count(q)) { r = specification_count(q); } else if (r < 1) { r = 1; } p = tex_get_specification_penalty(q, r); } else if (passive_pen_inter(cur_p)) { p = passive_pen_inter(cur_p); } else { p = properties->inter_line_penalty; } if (p) { pen += p; tex_aux_trace_penalty("interline", cur_line, r, p, pen); } } if (! (shaping && is_shaping_penalties_mode(spm, club_penalty_shaping))) { halfword p; q = properties->club_penalties; if (q) { /*tex prevgraf */ r = cur_line - cur_list.prev_graf; if (r > specification_count(q)) { r = specification_count(q); } else if (r < 1) { r = 1; } p = tex_get_specification_penalty(q, r); } else if (cur_line == cur_list.prev_graf + 1) { /*tex prevgraf */ p = properties->club_penalty; } else { p = 0; } if (p) { pen += p; tex_aux_trace_penalty("club", cur_line, r, p, pen); } } if (! (shaping && is_shaping_penalties_mode(spm, widow_penalty_shaping))) { halfword p; q = properties->display_math ? properties->display_widow_penalties : properties->widow_penalties; if (q) { r = lmt_linebreak_state.best_line - cur_line - 1; if (r > specification_count(q)) { r = specification_count(q); } else if (r < 1) { r = 1; } p = tex_get_specification_penalty(q, r); } else if (cur_line + 2 == lmt_linebreak_state.best_line) { p = properties->display_math ? properties->display_widow_penalty : properties->widow_penalty; } else { p = 0; } if (p) { pen += p; tex_aux_trace_penalty("widow", cur_line, r, p, pen); } } if (disc_break && ! (shaping && is_shaping_penalties_mode(spm, broken_penalty_shaping))) { halfword p; if (passive_pen_broken(cur_p) != 0) { p = passive_pen_broken(cur_p); } else { p = properties->broken_penalty; } if (p) { pen += p; tex_aux_trace_penalty("broken", cur_line, 0, p, pen); } } if (shaping && ! pen) { pen = properties->shaping_penalty; if (pen) { tex_aux_trace_penalty("shaping", cur_line, 0, pen, pen); } } if (pen) { r = tex_new_penalty_node(pen, linebreak_penalty_subtype); tex_couple_nodes(cur_list.tail, r); cur_list.tail = r; } } else { // if (tracing_penalties_par > 0) { // tex_begin_diagnostic(); // tex_print_format("[linebreak: no penalties injected]"); // tex_end_diagnostic(); // } } /*tex Append a penalty node, if a nonzero penalty is appropriate. Justify the line ending at breakpoint |cur_p|, and append it to the current vertical list, together with associated penalties and other insertions. */ ++cur_line; cur_p = passive_next_break(cur_p); if (cur_p && ! post_disc_break) { /*tex Prune unwanted nodes at the beginning of the next line. Glue and penalty and kern and math nodes are deleted at the beginning of a line, except in the anomalous case that the node to be deleted is actually one of the chosen breakpoints. Otherwise the pruning done here is designed to match the lookahead computation in |try_break|, where the |break_width| values are computed for non-discretionary breakpoints. */ r = temp_head; /*tex Normally we have a matching math open and math close node but when we cross a line the open node is removed, including any glue or penalties following it. This is however not that nice for callbacks that rely on symmetry. Of course this only counts for one liners, as we can still have only a begin or end node on a line. The end_of_math lua helper is made robust against this although there you should be aware of the fact that one can end up in the middle of math in callbacks that don't work on whole paragraphs, but at least this branch makes sure that some proper analysis is possible. (todo: check if math glyphs have the subtype marked done). */ /*tex Suboptimal but not critical. Todo.*/ while (1) { q = node_next(r); if (node_type(q) == math_node) { if (node_subtype(q) == begin_inline_math) { /*tex We keep it for tracing. */ break; } /*tex begin mathskip code */ math_surround(q) = 0 ; tex_reset_math_glue_to_zero(q); /*tex end mathskip code */ } if (q == passive_cur_break(cur_p)) { break; } else if (node_type(q) == glyph_node) { break; } else if (node_type(q) == glue_node && (node_subtype(q) == par_fill_left_skip_glue || node_subtype(q) == par_init_left_skip_glue)) { /*tex Keep it. Can be tricky after a |\break| with no follow up (loops). */ break; } else if (node_type(q) == par_node && node_subtype(q) == local_box_par_subtype) { /*tex Weird, in the middle somewhere .. these local penalties do this. */ break; /* if not we leak, so maybe this needs more testing */ } else if (non_discardable(q)) { break; } else if (node_type(q) == kern_node && node_subtype(q) != explicit_kern_subtype && node_subtype(q) != italic_kern_subtype) { break; } r = q; } if (r != temp_head) { node_next(r) = null; tex_flush_node_list(node_next(temp_head)); tex_try_couple_nodes(temp_head, q); } } if (cur_disc) { tex_try_couple_nodes(node_prev(cur_disc),node_next(cur_disc)); tex_flush_node(cur_disc); } /* We can clean up the par nodes. */ } while (cur_p); if ((cur_line != lmt_linebreak_state.best_line) || (node_next(temp_head))) { tex_confusion("line breaking"); } /*tex |prevgraf| etc */ cur_list.prev_graf = lmt_linebreak_state.best_line - 1; cur_list.direction_stack = lmt_linebreak_state.dir_ptr; lmt_linebreak_state.dir_ptr = null; } halfword tex_wipe_margin_kerns(halfword head) { /*tex We assume that head is a temp node or at least can be skipped (for now). */ halfword tail = head; while (1) { halfword next = node_next(tail); if (next) { if (node_type(next) == kern_node && (node_subtype(next) == left_margin_kern_subtype || node_subtype(next) == right_margin_kern_subtype)) { tex_try_couple_nodes(tail, node_next(next)); tex_flush_node(next); } else { tail = next; } } else { return tail; } } }