% This program is copyright (C) 1982 by D. E. Knuth; all rights are reserved. % Copying of this file is authorized only if (1) you are D. E. Knuth, or if % (2) you make absolutely no changes to your copy. (The WEB system provides % for alterations via an auxiliary file; the master file should stay intact.) % See Appendix H of the WEB manual for hints on how to install this program. % And see Appendix A of the TRIP manual for details about how to validate it. % TeX is a trademark of the American Mathematical Society. % METAFONT is a trademark of Addison-Wesley Publishing Company. % Version 0 was released in September 1982 after it passed a variety of tests. % Version 1 was released in November 1983 after thorough testing. % Version 1.1 fixed ``disappearing font identifiers'' et alia (July 1984). % Version 1.2 allowed `0' in response to an error, et alia (October 1984). % Version 1.3 made memory allocation more flexible and local (November 1984). % Version 1.4 fixed accents right after line breaks, et alia (April 1985). % Version 1.5 fixed \the\toks after other expansion in \edefs (August 1985). % Version 2.0 (almost identical to 1.5) corresponds to "Volume B" (April 1986). % Version 2.1 corrected anomalies in discretionary breaks (January 1987). % Version 2.2 corrected "(Please type...)" with null \endlinechar (April 1987). % Version 2.3 avoided incomplete page in premature termination (August 1987). % Version 2.4 fixed \noaligned rules in indented displays (August 1987). % Version 2.5 saved cur_order when expanding tokens (September 1987). % Version 2.6 added 10sp slop when shipping leaders (November 1987). % Version 2.7 improved rounding of negative-width characters (November 1987). % Version 2.8 fixed weird bug if no \patterns are used (December 1987). % Version 2.9 made \csname\endcsname's "relax" local (December 1987). % Version 2.91 fixed \outer\def\a0{}\a\a bug (April 1988). % Version 2.92 fixed \patterns, also file names with complex macros (May 1988). % Version 2.93 fixed negative halving in allocator when mem_min<0 (June 1988). % Version 2.94 kept open_log_file from calling fatal_error (November 1988). % Version 2.95 solved that problem a better way (December 1988). % Version 2.96 corrected bug in "Infinite shrinkage" recovery (January 1989). % Version 2.97 corrected blunder in creating 2.95 (February 1989). % Version 2.98 omitted save_for_after at outer level (March 1989). % Version 2.99 caught $$\begingroup\halign..$$ (June 1989). % Version 2.991 caught .5\ifdim.6... (June 1989). % Version 2.992 introduced major changes for 8-bit extensions (September 1989). % Version 2.993 fixed a save_stack synchronization bug et alia (December 1989). % Version 3.0 fixed unusual displays; was more \output robust (March 1990). % Version 3.1 fixed nullfont, disabled \write{\the\prevgraf} (September 1990). % Version 3.14 fixed unprintable font names and corrected typos (March 1991). % Version 3.141 more of same; reconstituted ligatures better (March 1992). % Version 3.1415 preserved nonexplicit kerns, tidied up (February 1993). % Version 3.14159 allowed fontmemsize to change; bulletproofing (March 1995). % Version 3.141592 fixed \xleaders, glueset, weird alignments (December 2002). % Version 3.1415926 was a general cleanup with minor fixes (February 2008). % Version 3.14159265 was similar (January 2014). % A reward of $327.68 will be paid to the first finder of any remaining bug. % Although considerable effort has been expended to make the TeX program % correct and reliable, no warranty is implied; the author disclaims any % obligation or liability for damages, including but not limited to % special, indirect, or consequential damages arising out of or in % connection with the use or performance of this software. This work has % been a ``labor of love'' and the author hopes that users enjoy it. % Here is TeX material that gets inserted after \input webmac \def\hang{\hangindent 3em\noindent\ignorespaces} \def\hangg#1 {\hang\hbox{#1 }} \def\textindent#1{\hangindent2.5em\noindent\hbox to2.5em{\hss#1 }\ignorespaces} \font\ninerm=cmr9 \let\mc=\ninerm % medium caps for names like SAIL \def\PASCAL{Pascal} \def\ph{\hbox{Pascal-H}} \def\pct!{{\char`\%}} % percent sign in ordinary text \font\logo=logo10 % font used for the METAFONT logo \def\MF{{\logo META}\-{\logo FONT}} \def\<#1>{$\langle#1\rangle$} \def\section{\mathhexbox278} \def\(#1){} % this is used to make section names sort themselves better \def\9#1{} % this is used for sort keys in the index via @@:sort key}{entry@@> \let\?=\relax % we want to be able to \write a \? \def\title{\TeX82} \def\topofcontents{\hsize 5.5in \vglue 0pt plus 1fil minus 1.5in \def\?##1]{\hbox to 1in{\hfil##1.\ }} } \def\botofcontents{\vskip 0pt plus 1fil minus 1.5in} \pageno=3 \def\glob{13} % this should be the section number of "" \def\gglob{20, 26} % this should be the next two sections of "" \def\.#1{\leavevmode\hbox{\tentex % typewriter type for strings \let\\=\BS % backslash in a string \let\'=\RQ % right quote in a string \let\`=\LQ % left quote in a string \let\{=\LB % left brace in a string \let\}=\RB % right brace in a string \let\~=\TL % tilde in a string \let\ =\SP % space in a string \let\_=\UL % underline in a string \let\&=\AM % ampersand in a string #1\kern.05em}} \def\^{\ifmmode\mathchar"222 \else\char`^ \fi} % pointer or hat \def\LQ{{\tt\char'22}} % left quote in a string \def\RQ{{\tt\char'23}} % right quote in a string \def\dotdot{\mathrel{.\,.}} % double dot, used only in math mode @s dotdot TeX @* Introduction. This is \TeX, a document compiler intended to produce typesetting of high quality. The \PASCAL\ program that follows is the definition of \TeX82, a standard @:PASCAL}{\PASCAL@> @!@:TeX82}{\TeX82@> version of \TeX\ that is designed to be highly portable so that identical output will be obtainable on a great variety of computers. The main purpose of the following program is to explain the algorithms of \TeX\ as clearly as possible. As a result, the program will not necessarily be very efficient when a particular \PASCAL\ compiler has translated it into a particular machine language. However, the program has been written so that it can be tuned to run efficiently in a wide variety of operating environments by making comparatively few changes. Such flexibility is possible because the documentation that follows is written in the \.{WEB} language, which is at a higher level than \PASCAL; the preprocessing step that converts \.{WEB} to \PASCAL\ is able to introduce most of the necessary refinements. Semi-automatic translation to other languages is also feasible, because the program below does not make extensive use of features that are peculiar to \PASCAL. A large piece of software like \TeX\ has inherent complexity that cannot be reduced below a certain level of difficulty, although each individual part is fairly simple by itself. The \.{WEB} language is intended to make the algorithms as readable as possible, by reflecting the way the individual program pieces fit together and by providing the cross-references that connect different parts. Detailed comments about what is going on, and about why things were done in certain ways, have been liberally sprinkled throughout the program. These comments explain features of the implementation, but they rarely attempt to explain the \TeX\ language itself, since the reader is supposed to be familiar with {\sl The \TeX book}. @.WEB@> @:TeXbook}{\sl The \TeX book@> @ The present implementation has a long ancestry, beginning in the summer of~1977, when Michael~F. Plass and Frank~M. Liang designed and coded a prototype @^Plass, Michael Frederick@> @^Liang, Franklin Mark@> @^Knuth, Donald Ervin@> based on some specifications that the author had made in May of that year. This original proto\TeX\ included macro definitions and elementary manipulations on boxes and glue, but it did not have line-breaking, page-breaking, mathematical formulas, alignment routines, error recovery, or the present semantic nest; furthermore, it used character lists instead of token lists, so that a control sequence like \.{\\halign} was represented by a list of seven characters. A complete version of \TeX\ was designed and coded by the author in late 1977 and early 1978; that program, like its prototype, was written in the {\mc SAIL} language, for which an excellent debugging system was available. Preliminary plans to convert the {\mc SAIL} code into a form somewhat like the present ``web'' were developed by Luis Trabb~Pardo and @^Trabb Pardo, Luis Isidoro@> the author at the beginning of 1979, and a complete implementation was created by Ignacio~A. Zabala in 1979 and 1980. The \TeX82 program, which @^Zabala Salelles, Ignacio Andr\'es@> was written by the author during the latter part of 1981 and the early part of 1982, also incorporates ideas from the 1979 implementation of @^Guibas, Leonidas Ioannis@> @^Sedgewick, Robert@> @^Wyatt, Douglas Kirk@> \TeX\ in {\mc MESA} that was written by Leonidas Guibas, Robert Sedgewick, and Douglas Wyatt at the Xerox Palo Alto Research Center. Several hundred refinements were introduced into \TeX82 based on the experiences gained with the original implementations, so that essentially every part of the system has been substantially improved. After the appearance of ``Version 0'' in September 1982, this program benefited greatly from the comments of many other people, notably David~R. Fuchs and Howard~W. Trickey. A final revision in September 1989 extended the input character set to eight-bit codes and introduced the ability to hyphenate words from different languages, based on some ideas of Michael~J. Ferguson. @^Fuchs, David Raymond@> @^Trickey, Howard Wellington@> @^Ferguson, Michael John@> No doubt there still is plenty of room for improvement, but the author is firmly committed to keeping \TeX82 ``frozen'' from now on; stability and reliability are to be its main virtues. On the other hand, the \.{WEB} description can be extended without changing the core of \TeX82 itself, and the program has been designed so that such extensions are not extremely difficult to make. The |banner| string defined here should be changed whenever \TeX\ undergoes any modifications, so that it will be clear which version of \TeX\ might be the guilty party when a problem arises. @^extensions to \TeX@> @^system dependencies@> If this program is changed, the resulting system should not be called `\TeX'; the official name `\TeX' by itself is reserved for software systems that are fully compatible with each other. A special test suite called the ``\.{TRIP} test'' is available for helping to determine whether a particular implementation deserves to be known as `\TeX' [cf.~Stanford Computer Science report CS1027, November 1984]. @d banner "This is TeX, Version 3.14159265 (HINT)" /*printed when \TeX\ starts*/ @ Different \PASCAL s have slightly different conventions, and the present @!@:PASCAL H}{\ph@> program expresses \TeX\ in terms of the \PASCAL\ that was available to the author in 1982. Constructions that apply to this particular compiler, which we shall call \ph, should help the reader see how to make an appropriate interface for other systems if necessary. (\ph\ is Charles Hedrick's modification of a compiler @^Hedrick, Charles Locke@> for the DECsystem-10 that was originally developed at the University of Hamburg; cf.\ {\sl SOFTWARE---Practice \AM\ Experience \bf6} (1976), 29--42. The \TeX\ program below is intended to be adaptable, without extensive changes, to most other versions of \PASCAL, so it does not fully use the admirable features of \ph. Indeed, a conscious effort has been made here to avoid using several idiosyncratic features of standard \PASCAL\ itself, so that most of the code can be translated mechanically into other high-level languages. For example, the `\&{with}' and `\\{new}' features are not used, nor are pointer types, set types, or enumerated scalar types; there are no `\&{var}' parameters, except in the case of files; there are no tag fields on variant records; there are no assignments |double=int|; no procedures are declared local to other procedures.) The portions of this program that involve system-dependent code, where changes might be necessary because of differences between \PASCAL\ compilers and/or differences between operating systems, can be identified by looking at the sections whose numbers are listed under `system dependencies' in the index. Furthermore, the index entries for `dirty \PASCAL' list all places where the restrictions of \PASCAL\ have not been followed perfectly, for one reason or another. @!@^system dependencies@> @!@^dirty \PASCAL@> Incidentally, \PASCAL's standard |round| function can be problematical, because it disagrees with the IEEE floating-point standard. Many implementors have therefore chosen to substitute their own home-grown rounding procedure. @ The program begins with a normal \PASCAL\ program heading, whose components will be filled in later, using the conventions of \.{WEB}. @.WEB@> For example, the portion of the program called `\X\glob:Global variables\X' below will be replaced by a sequence of variable declarations that starts in $\section\glob$ of this documentation. In this way, we are able to define each individual global variable when we are prepared to understand what it means; we do not have to define all of the globals at once. Cross references in $\section\glob$, where it says ``See also sections \gglob, \dots,'' also make it possible to look at the set of all global variables, if desired. Similar remarks apply to the other portions of the program heading. Actually the heading shown here is not quite normal: The || line does not mention any |output| file, because \ph\ would ask the \TeX\ user to specify a file name if |output| were specified here. @:PASCAL H}{\ph@> @^system dependencies@> @f type true /*but `|type|' will not be treated as a reserved word*/ @p@t\4@>@@; /*all file names are defined dynamically*/ @@; @@; @@; @@; @# void initialize(void) /*this procedure gets things started properly*/ {@+@@; @; } @# @t\4@>@@; @t\4@>@@; @ The overall \TeX\ program begins with the heading just shown, after which comes a bunch of procedure declarations and function declarations. Finally we will get to the main program, which begins with the comment `|start_here|'. If you want to skip down to the main program now, you can look up `|start_here|' in the index. But the author suggests that the best way to understand this program is to follow pretty much the order of \TeX's components as they appear in the \.{WEB} description you are now reading, since the present ordering is intended to combine the advantages of the ``bottom up'' and ``top down'' approaches to the problem of understanding a somewhat complicated system. @ Three labels must be declared in the main program, so we give them symbolic names. @= @t\hskip-2pt@>@t\hskip-2pt@>@, /*key control points*/ @ Some of the code below is intended to be used only when diagnosing the strange behavior that sometimes occurs when \TeX\ is being installed or when system wizards are fooling around with \TeX\ without quite knowing what they are doing. Such code will not normally be compiled; it is delimited by the codewords `$|debug|\ldots|debug|$', with apologies to people who wish to preserve the purity of English. Similarly, there is some conditional code delimited by `$|stat|\ldots|tats|$' that is intended for use when statistics are to be kept about \TeX's memory usage. The |stat| $\ldots$ |tats| code also implements diagnostic information for \.{\\tracingparagraphs} and \.{\\tracingpages}. @^debugging@> @ This program has two important variations: (1) There is a long and slow version called \.{INITEX}, which does the extra calculations needed to @.INITEX@> initialize \TeX's internal tables; and (2)~there is a shorter and faster production version, which cuts the initialization to a bare minimum. Parts of the program that are needed in (1) but not in (2) are delimited by the codewords `$|init|\ldots|tini|$'. @= @@; #ifdef @!INIT @@; #endif @ If the first character of a \PASCAL\ comment is a dollar sign, \ph\ treats the comment as a list of ``compiler directives'' that will affect the translation of this program into machine language. The directives shown below specify full checking and inclusion of the \PASCAL\ debugger when \TeX\ is being debugged, but they cause range checking and other redundant code to be eliminated when the production system is being generated. Arithmetic overflow will be detected in all cases. @:PASCAL H}{\ph@> @^system dependencies@> @^overflow in arithmetic@> @s int8_t int @s uint8_t int @s uint16_t int @s halfword int @s in TeX @s line normal @s to do @= #include #include #include #include #include #include #define odd(X) ((X)&1) #define chr(X) ((unsigned char)(X)) #define ord(X) ((int)(X)) #if __SIZEOF_FLOAT__==4 typedef float float32_t; #else #error float type must have size 4 #endif @h @ This \TeX\ implementation conforms to the rules of the {\sl Pascal User @:PASCAL}{\PASCAL@> @^system dependencies@> Manual} published by Jensen and Wirth in 1975, except where system-dependent @^Wirth, Niklaus@> @^Jensen, Kathleen@> code is necessary to make a useful system program, and except in another respect where such conformity would unnecessarily obscure the meaning and clutter up the code: We assume that |case| statements may include a default case that applies if no matching label is found. Thus, we shall use constructions like $$\vbox{\halign{\ignorespaces#\hfil\cr |case x|\cr 1: $\langle\,$code for $x=1\,\rangle$;\cr 3: $\langle\,$code for $x=3\,\rangle$;\cr |default:| $\langle\,$code for |x!=1| and |x!=3|$\,\rangle$\cr |} |\cr}}$$ since most \PASCAL\ compilers have plugged this hole in the language by incorporating some sort of default mechanism. For example, the \ph\ compiler allows `|default:|:' as a default label, and other \PASCAL s allow syntaxes like `\&{else}' or `\&{otherwise}' or `\\{otherwise}:', etc. The definitions of |default:| and |} | should be changed to agree with local conventions. Note that no semicolon appears before |} | in this program, so the definition of |} | should include a semicolon if the compiler wants one. (Of course, if no default mechanism is available, the |case| statements of \TeX\ will have to be laboriously extended by listing all remaining cases. People who are stuck with such \PASCAL s have, in fact, done this, successfully but not happily!) @:PASCAL H}{\ph@> @ The following parameters can be changed at compile time to extend or reduce \TeX's capacity. They may have different values in \.{INITEX} and in production versions of \TeX. @.INITEX@> @^system dependencies@> @= enum {@+@!mem_max=30000@+}; /*greatest index in \TeX's internal |mem| array; must be strictly less than |max_halfword|; must be equal to |mem_top| in \.{INITEX}, otherwise | >= mem_top|*/ enum {@+@!mem_min=0@+}; /*smallest index in \TeX's internal |mem| array; must be |min_halfword| or more; must be equal to |mem_bot| in \.{INITEX}, otherwise | <= mem_bot|*/ enum {@+@!buf_size=500@+}; /*maximum number of characters simultaneously present in current lines of open files and in control sequences between \.{\\csname} and \.{\\endcsname}; must not exceed |max_halfword|*/ enum {@+@!error_line=72@+}; /*width of context lines on terminal error messages*/ enum {@+@!half_error_line=42@+}; /*width of first lines of contexts in terminal error messages; should be between 30 and |error_line-15|*/ enum {@+@!max_print_line=79@+}; /*width of longest text lines output; should be at least 60*/ enum {@+@!stack_size=200@+}; /*maximum number of simultaneous input sources*/ enum {@+@!max_in_open=6@+}; /*maximum number of input files and error insertions that can be going on simultaneously*/ enum {@+@!font_max=75@+}; /*maximum internal font number; must not exceed |max_quarterword| and must be at most |font_base+256|*/ enum {@+@!font_mem_size=20000@+}; /*number of words of |font_info| for all fonts*/ enum {@+@!param_size=60@+}; /*maximum number of simultaneous macro parameters*/ enum {@+@!nest_size=40@+}; /*maximum number of semantic levels simultaneously active*/ enum {@+@!max_strings=3000@+}; /*maximum number of strings; must not exceed |max_halfword|*/ enum {@+@!string_vacancies=8000@+}; /*the minimum number of characters that should be available for the user's control sequences and font names, after \TeX's own error messages are stored*/ enum {@+@!pool_size=32000@+}; /*maximum number of characters in strings, including all error messages and help texts, and the names of all fonts and control sequences; must exceed |string_vacancies| by the total length of \TeX's own strings, which is currently about 23000*/ enum {@+@!save_size=600@+}; /*space for saving values outside of current group; must be at most |max_halfword|*/ enum {@+@!trie_size=8000@+}; /*space for hyphenation patterns; should be larger for \.{INITEX} than it is in production versions of \TeX*/ enum {@+@!trie_op_size=500@+}; /*space for ``opcodes'' in the hyphenation patterns*/ enum {@+@!dvi_buf_size=800@+}; /*size of the output buffer; must be a multiple of 8*/ enum {@+@!file_name_size=40@+}; /*file names shouldn't be longer than this*/ const char *@!pool_name="TeXformats:TEX.POOL "; /*string of length |file_name_size|; tells where the string pool appears*/ @.TeXformats@> @ Like the preceding parameters, the following quantities can be changed at compile time to extend or reduce \TeX's capacity. But if they are changed, it is necessary to rerun the initialization program \.{INITEX} @.INITEX@> to generate new tables for the production \TeX\ program. One can't simply make helter-skelter changes to the following constants, since certain rather complex initialization numbers are computed from them. They are defined here using \.{WEB} macros, instead of being put into \PASCAL's || list, in order to emphasize this distinction. @d mem_bot 0 /*smallest index in the |mem| array dumped by \.{INITEX}; must not be less than |mem_min|*/ @d mem_top 30000 /*largest index in the |mem| array dumped by \.{INITEX}; must be substantially larger than |mem_bot| and not greater than |mem_max|*/ @d font_base 0 /*smallest internal font number; must not be less than |min_quarterword|*/ @d hash_size 2100 /*maximum number of control sequences; it should be at most about |(mem_max-mem_min)/(double)10|*/ @d hash_prime 1777 /*a prime number equal to about 85\pct! of |hash_size|*/ @d hyph_size 307 /*another prime; the number of \.{\\hyphenation} exceptions*/ @^system dependencies@> @ In case somebody has inadvertently made bad settings of the ``constants,'' \TeX\ checks them using a global variable called |bad|. This is the first of many sections of \TeX\ where global variables are defined. @= int @!bad; /*is some ``constant'' wrong?*/ @ Later on we will say `\ignorespaces|if (mem_max >= max_halfword) bad=14|', or something similar. (We can't do that until |max_halfword| has been defined.) @= bad=0; if ((half_error_line < 30)||(half_error_line > error_line-15)) bad=1; if (max_print_line < 60) bad=2; if (dvi_buf_size%8!=0) bad=3; if (mem_bot+1100 > mem_top) bad=4; if (hash_prime > hash_size) bad=5; if (max_in_open >= 128) bad=6; if (mem_top < 256+11) bad=7; /*we will want |null_list > 255|*/ @ Labels are given symbolic names by the following definitions, so that occasional |goto| statements will be meaningful. We insert the label `|end|' just before the `\ignorespaces|} |\unskip' of a procedure in which we have used the `|goto end|' statement defined below; the label `|restart|' is occasionally used at the very beginning of a procedure; and the label `|reswitch|' is occasionally used just prior to a |case| statement in which some cases change the conditions and we wish to branch to the newly applicable case. Loops that are set up with the |loop| construction defined below are commonly exited by going to `|done|' or to `|found|' or to `|not_found|', and they are sometimes repeated by going to `|resume|'. If two or more parts of a subroutine start differently but end up the same, the shared code may be gathered together at `|common_ending|'. Incidentally, this program never declares a label that isn't actually used, because some fussy \PASCAL\ compilers will complain about redundant labels. @d done6 36 /*for exiting the sixth loop in a block*/ @ Here are some macros for common programming idioms. @d incr(X) X=X+1 /*increase a variable by unity*/ @d decr(X) X=X-1 /*decrease a variable by unity*/ @d negate(X) X=-X /*change the sign of a variable*/ @d loop @+while (true) @+ /*repeat over and over until a |goto| happens*/ @f loop else /*\.{WEB}'s |else| acts like `\ignorespaces|while true do|\unskip'*/ @d do_nothing /*empty statement*/ @d empty 0 /*symbolic name for a null constant*/ @* The character set. In order to make \TeX\ readily portable to a wide variety of computers, all of its input text is converted to an internal eight-bit code that includes standard ASCII, the ``American Standard Code for Information Interchange.'' This conversion is done immediately when each character is read in. Conversely, characters are converted from ASCII to the user's external representation just before they are output to a text file. Such an internal code is relevant to users of \TeX\ primarily because it governs the positions of characters in the fonts. For example, the character `\.A' has ASCII code $65=0101$, and when \TeX\ typesets this letter it specifies character number 65 in the current font. If that font actually has `\.A' in a different position, \TeX\ doesn't know what the real position is; the program that does the actual printing from \TeX's device-independent files is responsible for converting from ASCII to a particular font encoding. @^ASCII code@> \TeX's internal code also defines the value of constants that begin with a reverse apostrophe; and it provides an index to the \.{\\catcode}, \.{\\mathcode}, \.{\\uccode}, \.{\\lccode}, and \.{\\delcode} tables. @ Characters of text that have been converted to \TeX's internal form are said to be of type |ASCII_code|, which is a subrange of the integers. @= typedef uint8_t ASCII_code; /*eight-bit numbers*/ @ The original \PASCAL\ compiler was designed in the late 60s, when six-bit character sets were common, so it did not make provision for lowercase letters. Nowadays, of course, we need to deal with both capital and small letters in a convenient way, especially in a program for typesetting; so the present specification of \TeX\ has been written under the assumption that the \PASCAL\ compiler and run-time system permit the use of text files with more than 64 distinguishable characters. More precisely, we assume that the character set contains at least the letters and symbols associated with ASCII codes 040 through 0176; all of these characters are now available on most computer terminals. Since we are dealing with more characters than were present in the first \PASCAL\ compilers, we have to decide what to call the associated data type. Some \PASCAL s use the original name |unsigned char| for the characters in text files, even though there now are more than 64 such characters, while other \PASCAL s consider |unsigned char| to be a 64-element subrange of a larger data type that has some other name. In order to accommodate this difference, we shall use the name |text_char| to stand for the data type of the characters that are converted to and from |ASCII_code| when they are input and output. We shall also assume that |text_char| consists of the elements |chr(first_text_char)| through |chr(last_text_char)|, inclusive. The following definitions should be adjusted if necessary. @^system dependencies@> @d text_char unsigned char /*the data type of characters in text files*/ @d first_text_char 0 /*ordinal number of the smallest element of |text_char|*/ @d last_text_char 255 /*ordinal number of the largest element of |text_char|*/ @= int @!i; @ The \TeX\ processor converts between ASCII code and the user's external character set by means of arrays |xord| and |xchr| that are analogous to \PASCAL's |ord| and |chr| functions. @= ASCII_code @!xord[256]; /*specifies conversion of input characters*/ uint8_t @!xchr[256]; /*specifies conversion of output characters*/ @ Since we are assuming that our \PASCAL\ system is able to read and write the visible characters of standard ASCII (although not necessarily using the ASCII codes to represent them), the following assignment statements initialize the standard part of the |xchr| array properly, without needing any system-dependent changes. On the other hand, it is possible to implement \TeX\ with less complete character sets, and in such cases it will be necessary to change something here. @^system dependencies@> @= xchr[040]= ' ' ; xchr[041]= '!' ; xchr[042]= '"' ; xchr[043]= '#' ; xchr[044]= '$' ; xchr[045]= '%' ; xchr[046]= '&' ; xchr[047]= '\'' ;@/ xchr[050]= '(' ; xchr[051]= ')' ; xchr[052]= '*' ; xchr[053]= '+' ; xchr[054]= ',' ; xchr[055]= '-' ; xchr[056]= '.' ; xchr[057]= '/' ;@/ xchr[060]= '0' ; xchr[061]= '1' ; xchr[062]= '2' ; xchr[063]= '3' ; xchr[064]= '4' ; xchr[065]= '5' ; xchr[066]= '6' ; xchr[067]= '7' ;@/ xchr[070]= '8' ; xchr[071]= '9' ; xchr[072]= ':' ; xchr[073]= ';' ; xchr[074]= '<' ; xchr[075]= '=' ; xchr[076]= '>' ; xchr[077]= '?' ;@/ xchr[0100]= '@@' ; xchr[0101]= 'A' ; xchr[0102]= 'B' ; xchr[0103]= 'C' ; xchr[0104]= 'D' ; xchr[0105]= 'E' ; xchr[0106]= 'F' ; xchr[0107]= 'G' ;@/ xchr[0110]= 'H' ; xchr[0111]= 'I' ; xchr[0112]= 'J' ; xchr[0113]= 'K' ; xchr[0114]= 'L' ; xchr[0115]= 'M' ; xchr[0116]= 'N' ; xchr[0117]= 'O' ;@/ xchr[0120]= 'P' ; xchr[0121]= 'Q' ; xchr[0122]= 'R' ; xchr[0123]= 'S' ; xchr[0124]= 'T' ; xchr[0125]= 'U' ; xchr[0126]= 'V' ; xchr[0127]= 'W' ;@/ xchr[0130]= 'X' ; xchr[0131]= 'Y' ; xchr[0132]= 'Z' ; xchr[0133]= '[' ; xchr[0134]= '\\' ; xchr[0135]= ']' ; xchr[0136]= '^' ; xchr[0137]= '_' ;@/ xchr[0140]= '`' ; xchr[0141]= 'a' ; xchr[0142]= 'b' ; xchr[0143]= 'c' ; xchr[0144]= 'd' ; xchr[0145]= 'e' ; xchr[0146]= 'f' ; xchr[0147]= 'g' ;@/ xchr[0150]= 'h' ; xchr[0151]= 'i' ; xchr[0152]= 'j' ; xchr[0153]= 'k' ; xchr[0154]= 'l' ; xchr[0155]= 'm' ; xchr[0156]= 'n' ; xchr[0157]= 'o' ;@/ xchr[0160]= 'p' ; xchr[0161]= 'q' ; xchr[0162]= 'r' ; xchr[0163]= 's' ; xchr[0164]= 't' ; xchr[0165]= 'u' ; xchr[0166]= 'v' ; xchr[0167]= 'w' ;@/ xchr[0170]= 'x' ; xchr[0171]= 'y' ; xchr[0172]= 'z' ; xchr[0173]= '{' ; xchr[0174]= '|' ; xchr[0175]= '}' ; xchr[0176]= '~' ;@/ @ Some of the ASCII codes without visible characters have been given symbolic names in this program because they are used with a special meaning. @d null_code 00 /*ASCII code that might disappear*/ @d carriage_return 015 /*ASCII code used at end of line*/ @d invalid_code 0177 /*ASCII code that many systems prohibit in text files*/ @ The ASCII code is ``standard'' only to a certain extent, since many computer installations have found it advantageous to have ready access to more than 94 printing characters. Appendix~C of {\sl The \TeX book\/} gives a complete specification of the intended correspondence between characters and \TeX's internal representation. @:TeXbook}{\sl The \TeX book@> If \TeX\ is being used on a garden-variety \PASCAL\ for which only standard ASCII codes will appear in the input and output files, it doesn't really matter what codes are specified in |xchr[0 dotdot 037]|, but the safest policy is to blank everything out by using the code shown below. However, other settings of |xchr| will make \TeX\ more friendly on computers that have an extended character set, so that users can type things like `\.^^Z' instead of `\.{\\ne}'. People with extended character sets can assign codes arbitrarily, giving an |xchr| equivalent to whatever characters the users of \TeX\ are allowed to have in their input files. It is best to make the codes correspond to the intended interpretations as shown in Appendix~C whenever possible; but this is not necessary. For example, in countries with an alphabet of more than 26 letters, it is usually best to map the additional letters into codes less than~040. To get the most ``permissive'' character set, change | ' ' | on the right of these assignment statements to |chr(i)|. @^character set dependencies@> @^system dependencies@> @= for (i=0; i<=037; i++) xchr[i]= ' ' ; for (i=0177; i<=0377; i++) xchr[i]= ' ' ; @ The following system-independent code makes the |xord| array contain a suitable inverse to the information in |xchr|. Note that if |xchr[i]==xchr[j]| where |i < j < 0177|, the value of |xord[xchr[i]]| will turn out to be |j| or more; hence, standard ASCII code numbers will be used instead of codes below 040 in case there is a coincidence. @= for (i=first_text_char; i<=last_text_char; i++) xord[chr(i)]=invalid_code; for (i=0200; i<=0377; i++) xord[xchr[i]]=i; for (i=0; i<=0176; i++) xord[xchr[i]]=i; @* Input and output. The bane of portability is the fact that different operating systems treat input and output quite differently, perhaps because computer scientists have not given sufficient attention to this problem. People have felt somehow that input and output are not part of ``real'' programming. Well, it is true that some kinds of programming are more fun than others. With existing input/output conventions being so diverse and so messy, the only sources of joy in such parts of the code are the rare occasions when one can find a way to make the program a little less bad than it might have been. We have two choices, either to attack I/O now and get it over with, or to postpone I/O until near the end. Neither prospect is very attractive, so let's get it over with. The basic operations we need to do are (1)~inputting and outputting of text, to or from a file or the user's terminal; (2)~inputting and outputting of eight-bit bytes, to or from a file; (3)~instructing the operating system to initiate (``open'') or to terminate (``close'') input or output from a specified file; (4)~testing whether the end of an input file has been reached. \TeX\ needs to deal with two kinds of files. We shall use the term |alpha_file| for a file that contains textual data, and the term |byte_file| for a file that contains eight-bit binary information. These two types turn out to be the same on many computers, but sometimes there is a significant distinction, so we shall be careful to distinguish between them. Standard protocols for transferring such files from computer to computer, via high-speed networks, are now becoming available to more and more communities of users. The program actually makes use also of a third kind of file, called a |word_file|, when dumping and reloading base information for its own initialization. We shall define a word file later; but it will be possible for us to specify simple operations on word files before they are defined. @= typedef uint8_t eight_bits; /*unsigned one-byte quantity*/ typedef struct {@+FILE *f;@+text_char@,d;@+} alpha_file; /*files that contain textual data*/ typedef struct {@+FILE *f;@+eight_bits@,d;@+} byte_file; /*files that contain binary data*/ @ Most of what we need to do with respect to input and output can be handled by the I/O facilities that are standard in \PASCAL, i.e., the routines called |get|, |put|, |eof|, and so on. But standard \PASCAL\ does not allow file variables to be associated with file names that are determined at run time, so it cannot be used to implement \TeX; some sort of extension to \PASCAL's ordinary |reset| and |rewrite| is crucial for our purposes. We shall assume that |name_of_file| is a variable of an appropriate type such that the \PASCAL\ run-time system being used to implement \TeX\ can open a file whose external name is specified by |name_of_file|. @^system dependencies@> @= uint8_t @!name_of_file0[file_name_size+1]={0}, *const @!name_of_file = @!name_of_file0-1;@;@/ /*on some systems this may be a \&{record} variable*/ uint8_t @!name_length;@/ /*this many characters are actually relevant in |name_of_file| (the rest are blank)*/ @ The \ph\ compiler with which the present version of \TeX\ was prepared has extended the rules of \PASCAL\ in a very convenient way. To open file~|f|, we can write $$\vbox{\halign{#\hfil\qquad&#\hfil\cr |reset(f,@t\\{name}@>,"/O")|&for input;\cr |rewrite(f,@t\\{name}@>,"/O")|&for output.\cr}}$$ The `\\{name}' parameter, which is of type `{\bf packed array $[\langle\\{any}\rangle]$ of \\{char}}', stands for the name of the external file that is being opened for input or output. Blank spaces that might appear in \\{name} are ignored. The `\.{/O}' parameter tells the operating system not to issue its own error messages if something goes wrong. If a file of the specified name cannot be found, or if such a file cannot be opened for some other reason (e.g., someone may already be trying to write the same file), we will have |@!erstat(f)!=0| after an unsuccessful |reset| or |rewrite|. This allows \TeX\ to undertake appropriate corrective action. @:PASCAL H}{\ph@> @^system dependencies@> \TeX's file-opening procedures return |false| if no file identified by |name_of_file| could be opened. @d reset_OK(X) erstat(X)==0 @d rewrite_OK(X) erstat(X)==0 @p bool a_open_in(alpha_file *f) /*open a text file for input*/ {@+reset((*f), name_of_file,"r");return reset_OK((*f)); } @# bool a_open_out(alpha_file *f) /*open a text file for output*/ {@+rewrite((*f), name_of_file,"w");return rewrite_OK((*f)); } @# bool b_open_in(byte_file *f) /*open a binary file for input*/ {@+reset((*f), name_of_file,"rb");return reset_OK((*f)); } @# bool b_open_out(byte_file *f) /*open a binary file for output*/ {@+rewrite((*f), name_of_file,"wb");return rewrite_OK((*f)); } @# bool w_open_in(word_file *f) /*open a word file for input*/ {@+reset((*f), name_of_file,"rb");return reset_OK((*f)); } @# bool w_open_out(word_file *f) /*open a word file for output*/ {@+rewrite((*f), name_of_file,"wb");return rewrite_OK((*f)); } @ Files can be closed with the \ph\ routine `|close(f)|', which @:PASCAL H}{\ph@> @^system dependencies@> should be used when all input or output with respect to |f| has been completed. This makes |f| available to be opened again, if desired; and if |f| was used for output, the |close| operation makes the corresponding external file appear on the user's area, ready to be read. These procedures should not generate error messages if a file is being closed before it has been successfully opened. @p void a_close(alpha_file *f) /*close a text file*/ {@+close((*f)); } @# void b_close(byte_file *f) /*close a binary file*/ {@+close((*f)); } @# void w_close(word_file *f) /*close a word file*/ {@+close((*f)); } @ Binary input and output are done with \PASCAL's ordinary |get| and |put| procedures, so we don't have to make any other special arrangements for binary~I/O. Text output is also easy to do with standard \PASCAL\ routines. The treatment of text input is more difficult, however, because of the necessary translation to |ASCII_code| values. \TeX's conventions should be efficient, and they should blend nicely with the user's operating environment. @ Input from text files is read one line at a time, using a routine called |input_ln|. This function is defined in terms of global variables called |buffer|, |first|, and |last| that will be described in detail later; for now, it suffices for us to know that |buffer| is an array of |ASCII_code| values, and that |first| and |last| are indices into this array representing the beginning and ending of a line of text. @= ASCII_code @!buffer[buf_size+1]; /*lines of characters being read*/ uint16_t @!first; /*the first unused position in |buffer|*/ uint16_t @!last; /*end of the line just input to |buffer|*/ uint16_t @!max_buf_stack; /*largest index used in |buffer|*/ @ The |input_ln| function brings the next line of input from the specified file into available positions of the buffer array and returns the value |true|, unless the file has already been entirely read, in which case it returns |false| and sets |last=first|. In general, the |ASCII_code| numbers that represent the next line of the file are input into |buffer[first]|, |buffer[first+1]|, \dots, |buffer[last-1]|; and the global variable |last| is set equal to |first| plus the length of the line. Trailing blanks are removed from the line; thus, either |last==first| (in which case the line was entirely blank) or |buffer[last-1]!=' '|. An overflow error is given, however, if the normal actions of |input_ln| would make |last >= buf_size|; this is done so that other parts of \TeX\ can safely look at the contents of |buffer[last+1]| without overstepping the bounds of the |buffer| array. Upon entry to |input_ln|, the condition |first < buf_size| will always hold, so that there is always room for an ``empty'' line. The variable |max_buf_stack|, which is used to keep track of how large the |buf_size| parameter must be to accommodate the present job, is also kept up to date by |input_ln|. If the |bypass_eoln| parameter is |true|, |input_ln| will do a |get| before looking at the first character of the line; this skips over an |eoln| that was in |f.d|. The procedure does not do a |get| when it reaches the end of the line; therefore it can be used to acquire input from the user's terminal as well as from ordinary text files. Standard \PASCAL\ says that a file should have |eoln| immediately before |eof|, but \TeX\ needs only a weaker restriction: If |eof| occurs in the middle of a line, the system function |eoln| should return a |true| result (even though |f.d| will be undefined). Since the inner loop of |input_ln| is part of \TeX's ``inner loop''---each character of input comes in at this place---it is wise to reduce system overhead by making use of special routines that read in an entire array of characters at once, if such routines are available. The following code uses standard \PASCAL\ to illustrate what needs to be done, but finer tuning is often possible at well-developed \PASCAL\ sites. @^inner loop@> @p bool input_ln(alpha_file *f, bool @!bypass_eoln) /*inputs the next line or returns |false|*/ {@+uint16_t last_nonblank; /*|last| with trailing blanks removed*/ if (bypass_eoln) if (!eof((*f))) get((*f)); /*input the first character of the line into |f.d|*/ last=first; /*cf.\ Matthew 19\thinspace:\thinspace30*/ if (eof((*f))) return false; else{@+last_nonblank=first; while (!eoln((*f))) {@+if (last >= max_buf_stack) {@+max_buf_stack=last+1; if (max_buf_stack==buf_size) @; } buffer[last]=xord[(*f).d];get((*f));incr(last); if (buffer[last-1]!=' ') last_nonblank=last; } last=last_nonblank;return true; } } @ The user's terminal acts essentially like other files of text, except that it is used both for input and for output. When the terminal is considered an input file, the file variable is called |term_in|, and when it is considered an output file the file variable is |term_out|. @^system dependencies@> @= alpha_file @!term_in; /*the terminal as an input file*/ alpha_file @!term_out; /*the terminal as an output file*/ @ Here is how to open the terminal files in \ph. The `\.{/I}' switch suppresses the first |get|. @:PASCAL H}{\ph@> @^system dependencies@> @d t_open_in term_in.f=stdin /*open the terminal for text input*/ @d t_open_out term_out.f=stdout /*open the terminal for text output*/ @ Sometimes it is necessary to synchronize the input/output mixture that happens on the user's terminal, and three system-dependent procedures are used for this purpose. The first of these, |update_terminal|, is called when we want to make sure that everything we have output to the terminal so far has actually left the computer's internal buffers and been sent. The second, |clear_terminal|, is called when we wish to cancel any input that the user may have typed ahead (since we are about to issue an unexpected error message). The third, |wake_up_terminal|, is supposed to revive the terminal if the user has disabled it by some instruction to the operating system. The following macros show how these operations can be specified in \ph: @:PASCAL H}{\ph@> @^system dependencies@> @d update_terminal fflush(term_out.f) /*empty the terminal output buffer*/ @d clear_terminal fflush(term_in.f) /*clear the terminal input buffer*/ @d wake_up_terminal do_nothing /*cancel the user's cancellation of output*/ @ We need a special routine to read the first line of \TeX\ input from the user's terminal. This line is different because it is read before we have opened the transcript file; there is sort of a ``chicken and egg'' problem here. If the user types `\.{\\input paper}' on the first line, or if some macro invoked by that line does such an \.{\\input}, the transcript file will be named `\.{paper.log}'; but if no \.{\\input} commands are performed during the first line of terminal input, the transcript file will acquire its default name `\.{texput.log}'. (The transcript file will not contain error messages generated by the first line before the first \.{\\input} command.) @.texput@> The first line is even more special if we are lucky enough to have an operating system that treats \TeX\ differently from a run-of-the-mill \PASCAL\ object program. It's nice to let the user start running a \TeX\ job by typing a command line like `\.{tex paper}'; in such a case, \TeX\ will operate as if the first line of input were `\.{paper}', i.e., the first line will consist of the remainder of the command line, after the part that invoked \TeX. The first line is special also because it may be read before \TeX\ has input a format file. In such cases, normal error messages cannot yet be given. The following code uses concepts that will be explained later. (If the \PASCAL\ compiler does not support non-local |@!goto|\unskip, the @^system dependencies@> statement `|goto exit(0)|' should be replaced by something that quietly terminates the program.) @= if (format_ident==0) {@+write_ln(term_out,"Buffer size exceeded!");exit(0); @.Buffer size exceeded@> } else{@+cur_input.loc_field=first;cur_input.limit_field=last-1; overflow("buffer size", buf_size); @:TeX capacity exceeded buffer size}{\quad buffer size@> } @ Different systems have different ways to get started. But regardless of what conventions are adopted, the routine that initializes the terminal should satisfy the following specifications: \yskip\textindent{1)}It should open file |term_in| for input from the terminal. (The file |term_out| will already be open for output to the terminal.) \textindent{2)}If the user has given a command line, this line should be considered the first line of terminal input. Otherwise the user should be prompted with `\.{**}', and the first line of input should be whatever is typed in response. \textindent{3)}The first line of input, which might or might not be a command line, should appear in locations |first| to |last-1| of the |buffer| array. \textindent{4)}The global variable |loc| should be set so that the character to be read next by \TeX\ is in |buffer[loc]|. This character should not be blank, and we should have |loc < last|. \yskip\noindent(It may be necessary to prompt the user several times before a non-blank line comes in. The prompt is `\.{**}' instead of the later `\.*' because the meaning is slightly different: `\.{\\input}' need not be typed immediately after~`\.{**}'.) @d loc cur_input.loc_field /*location of first unread character in |buffer|*/ @ The following program does the required initialization without retrieving a possible command line. It should be clear how to modify this routine to deal with command lines, if the system permits them. @^system dependencies@> @p bool init_terminal(void) /*gets the terminal input started*/ {@+ t_open_in; loop@+{@+wake_up_terminal;write(term_out,"**");update_terminal; @.**@> if (!input_ln(&term_in, true)) /*this shouldn't happen*/ {@+write_ln(term_out); write(term_out,"! End of file on the terminal... why?"); @.End of file on the terminal@> return false; } loc=first; while ((loc < last)&&(buffer[loc]==' ')) incr(loc); if (loc < last) {@+return true; /*return unless the line was all blank*/ } write_ln(term_out,"Please type the name of your input file."); } } @* String handling. Control sequence names and diagnostic messages are variable-length strings of eight-bit characters. Since \PASCAL\ does not have a well-developed string mechanism, \TeX\ does all of its string processing by homegrown methods. Elaborate facilities for dynamic strings are not needed, so all of the necessary operations can be handled with a simple data structure. The array |str_pool| contains all of the (eight-bit) ASCII codes in all of the strings, and the array |str_start| contains indices of the starting points of each string. Strings are referred to by integer numbers, so that string number |s| comprises the characters |str_pool[j]| for |str_start[s] <= j < str_start[s+1]|. Additional integer variables |pool_ptr| and |str_ptr| indicate the number of entries used so far in |str_pool| and |str_start|, respectively; locations |str_pool[pool_ptr]| and |str_start[str_ptr]| are ready for the next string to be allocated. String numbers 0 to 255 are reserved for strings that correspond to single ASCII characters. This is in accordance with the conventions of \.{WEB}, @.WEB@> which converts single-character strings into the ASCII code number of the single character involved, while it converts other strings into integers and builds a string pool file. Thus, when the string constant \.{"."} appears in the program below, \.{WEB} converts it into the integer 46, which is the ASCII code for a period, while \.{WEB} will convert a string like \.{"hello"} into some integer greater than~255. String number 46 will presumably be the single character `\..'; but some ASCII codes have no standard visible representation, and \TeX\ sometimes needs to be able to print an arbitrary ASCII character, so the first 256 strings are used to specify exactly what should be printed for each of the 256 possibilities. Elements of the |str_pool| array must be ASCII codes that can actually be printed; i.e., they must have an |xchr| equivalent in the local character set. (This restriction applies only to preloaded strings, not to those generated dynamically by the user.) Some \PASCAL\ compilers won't pack integers into a single byte unless the integers lie in the range |-128 dotdot 127|. To accommodate such systems we access the string pool only via macros that can easily be redefined. @^system dependencies@> @d si(X) X /*convert from |ASCII_code| to |packed_ASCII_code|*/ @d so(X) X /*convert from |packed_ASCII_code| to |ASCII_code|*/ @= typedef uint16_t pool_pointer; /*for variables that point into |str_pool|*/ typedef uint16_t str_number; /*for variables that point into |str_start|*/ typedef uint8_t packed_ASCII_code; /*elements of |str_pool| array*/ @ @= @@; packed_ASCII_code @!str_pool[pool_size+1]= @<|str_pool| initialization@>; /*the characters*/ pool_pointer @!str_start[max_strings+1]= {@<|str_start| initialization@>}; /*the starting pointers*/ pool_pointer @!pool_ptr=@<|pool_ptr| initialization@>; /*first unused position in |str_pool|*/ str_number @!str_ptr=@<|str_ptr| initialization@>; /*number of the current string being created*/ pool_pointer @!init_pool_ptr; /*the starting value of |pool_ptr|*/ str_number @!init_str_ptr; /*the starting value of |str_ptr|*/ @ Several of the elementary string operations are performed using \.{WEB} macros instead of \PASCAL\ procedures, because many of the operations are done quite frequently and we want to avoid the overhead of procedure calls. For example, here is a simple macro that computes the length of a string. @.WEB@> @d length(X) (str_start[X+1]-str_start[X]) /*the number of characters in string number \#*/ @ The length of the current string is called |cur_length|: @d cur_length (pool_ptr-str_start[str_ptr]) @ Strings are created by appending character codes to |str_pool|. The |append_char| macro, defined here, does not check to see if the value of |pool_ptr| has gotten too high; this test is supposed to be made before |append_char| is used. There is also a |flush_char| macro, which erases the last character appended. To test if there is room to append |l| more characters to |str_pool|, we shall write |str_room(l)|, which aborts \TeX\ and gives an apologetic error message if there isn't enough room. @d append_char(X) /*put |ASCII_code| \# at the end of |str_pool|*/ {@+str_pool[pool_ptr]=si(X);incr(pool_ptr); } @d flush_char decr(pool_ptr) /*forget the last character in the pool*/ @d str_room(X) /*make sure that the pool hasn't overflowed*/ {@+if (pool_ptr+X > pool_size) overflow("pool size", pool_size-init_pool_ptr); @:TeX capacity exceeded pool size}{\quad pool size@> } @ Once a sequence of characters has been appended to |str_pool|, it officially becomes a string when the function |make_string| is called. This function returns the identification number of the new string as its value. @p str_number make_string(void) /*current string enters the pool*/ {@+if (str_ptr==max_strings) overflow("number of strings", max_strings-init_str_ptr); @:TeX capacity exceeded number of strings}{\quad number of strings@> incr(str_ptr);str_start[str_ptr]=pool_ptr; return str_ptr-1; } @ To destroy the most recently made string, we say |flush_string|. @d flush_string {@+decr(str_ptr);pool_ptr=str_start[str_ptr]; } @ The following subroutine compares string |s| with another string of the same length that appears in |buffer| starting at position |k|; the result is |true| if and only if the strings are equal. Empirical tests indicate that |str_eq_buf| is used in such a way that it tends to return |true| about 80 percent of the time. @p bool str_eq_buf(str_number @!s, int @!k) /*test equality of strings*/ {@+ /*loop exit*/ pool_pointer j; /*running index*/ bool @!result; /*result of comparison*/ j=str_start[s]; while (j < str_start[s+1]) {@+if (so(str_pool[j])!=buffer[k]) {@+result=false;goto not_found; } incr(j);incr(k); } result=true; not_found: return result; } @ Here is a similar routine, but it compares two strings in the string pool, and it does not assume that they have the same length. @p bool str_eq_str(str_number @!s, str_number @!t) /*test equality of strings*/ {@+ /*loop exit*/ pool_pointer j, @!k; /*running indices*/ bool @!result; /*result of comparison*/ result=false; if (length(s)!=length(t)) goto not_found; j=str_start[s];k=str_start[t]; while (j < str_start[s+1]) {@+if (str_pool[j]!=str_pool[k]) goto not_found; incr(j);incr(k); } result=true; not_found: return result; } @ The initial values of |str_pool|, |str_start|, |pool_ptr|, and |str_ptr| are computed by the \.{INITEX} program, based in part on the information that \.{WEB} has output while processing \TeX. @.INITEX@> @^string pool@> @p #ifdef @!INIT bool get_strings_started(void) /*initializes the string pool, but returns |false| if something goes wrong*/ {@+ int k, @!l; /*small indices or counters*/ uint8_t @!m, @!n; /*characters input from |pool_file|*/ str_number @!g; /*garbage*/ int @!a; /*accumulator for check sum*/ bool @!c; /*check sum has been checked*/ pool_ptr=0;str_ptr=0;str_start[0]=0; @; @; } #endif @ @d app_lc_hex(X) l=X; if (l < 10) append_char(l+'0')@;@+else append_char(l-10+'a') @= for (k=0; k<=255; k++) { if (@[@@]) { append_char('^');append_char('^'); if (k < 0100) append_char(k+0100)@; else if (k < 0200) append_char(k-0100)@; else{ app_lc_hex(k/16);app_lc_hex(k%16); } } else append_char(k); g=make_string(); } @ The first 128 strings will contain 95 standard ASCII characters, and the other 33 characters will be printed in three-symbol form like `\.{\^\^A}' unless a system-dependent change is made here. Installations that have an extended character set, where for example |xchr[032]==@t\.{\'^^Z\'}@>|, would like string 032 to be the single character 032 instead of the three characters 0136, 0136, 0132 (\.{\^\^Z}). On the other hand, even people with an extended character set will want to represent string 015 by \.{\^\^M}, since 015 is |carriage_return|; the idea is to produce visible strings instead of tabs or line-feeds or carriage-returns or bell-rings or characters that are treated anomalously in text files. Unprintable characters of codes 128--255 are, similarly, rendered \.{\^\^80}--\.{\^\^ff}. The boolean expression defined here should be |true| unless \TeX\ internal code number~|k| corresponds to a non-troublesome visible symbol in the local character set. An appropriate formula for the extended character set recommended in {\sl The \TeX book\/} would, for example, be `|k in[0, 010 dotdot 012, 014, 015, 033, 0177 dotdot 0377]|'. If character |k| cannot be printed, and |k < 0200|, then character |k+0100| or |k-0100| must be printable; moreover, ASCII codes |[041 dotdot 046, 060 dotdot 071, 0136, 0141 dotdot 0146, 0160 dotdot 0171]| must be printable. Thus, at least 81 printable characters are needed. @:TeXbook}{\sl The \TeX book@> @^character set dependencies@> @^system dependencies@> @= (k < ' ')||(k > '~') @ When the \.{WEB} system program called \.{TANGLE} processes the \.{TEX.WEB} description that you are now reading, it outputs the \PASCAL\ program \.{TEX.PAS} and also a string pool file called \.{TEX.POOL}. The \.{INITEX} @.WEB@>@.INITEX@> program reads the latter file, where each string appears as a two-digit decimal length followed by the string itself, and the information is recorded in \TeX's string memory. @= #ifdef @!INIT alpha_file @!pool_file; /*the string-pool file output by \.{TANGLE}*/ #endif @ @d bad_pool(X) {@+wake_up_terminal;write_ln(term_out, X); a_close(&pool_file);return false; } @= {@+int k;@+for(k=1; k<=file_name_size;k++)name_of_file[k]=pool_name[k-1];@+} /*we needn't set |name_length|*/ if (a_open_in(&pool_file)) {@+c=false; @/do@+{@; }@+ while (!(c)); a_close(&pool_file);return true; } else bad_pool("! I can't read TEX.POOL.") @.I can't read TEX.POOL@> @ @= {@+if (eof(pool_file)) bad_pool("! TEX.POOL has no check sum."); @.TEX.POOL has no check sum@> read(pool_file, m);@+read(pool_file, n); /*read two digits of string length*/ if (m== '*' ) @@; else{@+if ((xord[m] < '0')||(xord[m] > '9')||@| (xord[n] < '0')||(xord[n] > '9')) bad_pool("! TEX.POOL line doesn't begin with two digits."); @.TEX.POOL line doesn't...@> l=xord[m]*10+xord[n]-'0'*11; /*compute the length*/ if (pool_ptr+l+string_vacancies > pool_size) bad_pool("! You have to increase POOLSIZE."); @.You have to increase POOLSIZE@> for (k=1; k<=l; k++) {@+if (eoln(pool_file)) m= ' ' ;@+else read(pool_file, m); append_char(xord[m]); } read_ln(pool_file);g=make_string(); } } @ The \.{WEB} operation \.{@@\$} denotes the value that should be at the end of this \.{TEX.POOL} file; any other value means that the wrong pool file has been loaded. @^check sum@> @= {@+a=0;k=1; loop@+{@+if ((xord[n] < '0')||(xord[n] > '9')) bad_pool("! TEX.POOL check sum doesn't have nine digits."); @.TEX.POOL check sum...@> a=10*a+xord[n]-'0'; if (k==9) goto done; incr(k);read(pool_file, n); } done: if (a!=0) bad_pool("! TEX.POOL doesn't match; TANGLE me again."); @.TEX.POOL doesn't match@> c=true; } @* On-line and off-line printing. Messages that are sent to a user's terminal and to the transcript-log file are produced by several `|print|' procedures. These procedures will direct their output to a variety of places, based on the setting of the global variable |selector|, which has the following possible values: \yskip \hang |term_and_log|, the normal setting, prints on the terminal and on the transcript file. \hang |log_only|, prints only on the transcript file. \hang |term_only|, prints only on the terminal. \hang |no_print|, doesn't print at all. This is used only in rare cases before the transcript file is open. \hang |pseudo|, puts output into a cyclic buffer that is used by the |show_context| routine; when we get to that routine we shall discuss the reasoning behind this curious mode. \hang |new_string|, appends the output to the current string in the string pool. \hang 0 to 15, prints on one of the sixteen files for \.{\\write} output. \yskip \noindent The symbolic names `|term_and_log|', etc., have been assigned numeric codes that satisfy the convenient relations |no_print+1==term_only|, |no_print+2==log_only|, |term_only+2==log_only+1==term_and_log|. Three additional global variables, |tally| and |term_offset| and |file_offset|, record the number of characters that have been printed since they were most recently cleared to zero. We use |tally| to record the length of (possibly very long) stretches of printing; |term_offset| and |file_offset|, on the other hand, keep track of how many characters have appeared so far on the current line that has been output to the terminal or to the transcript file, respectively. @d no_print 16 /*|selector| setting that makes data disappear*/ @d term_only 17 /*printing is destined for the terminal only*/ @d log_only 18 /*printing is destined for the transcript file only*/ @d term_and_log 19 /*normal |selector| setting*/ @d pseudo 20 /*special |selector| setting for |show_context|*/ @d new_string 21 /*printing is deflected to the string pool*/ @d max_selector 21 /*highest selector setting*/ @= alpha_file @!log_file; /*transcript of \TeX\ session*/ uint8_t @!selector; /*where to print a message*/ uint8_t @!dig[23]; /*digits in a number being output*/ int @!tally; /*the number of characters recently printed*/ uint8_t @!term_offset; /*the number of characters on the current terminal line*/ uint8_t @!file_offset; /*the number of characters on the current file line*/ ASCII_code @!trick_buf[error_line+1]; /*circular buffer for pseudoprinting*/ int @!trick_count; /*threshold for pseudoprinting, explained later*/ int @!first_count; /*another variable for pseudoprinting*/ @ @= selector=term_only;tally=0;term_offset=0;file_offset=0; @ Macro abbreviations for output to the terminal and to the log file are defined here for convenience. Some systems need special conventions for terminal output, and it is possible to adhere to those conventions by changing |wterm|, |wterm_ln|, and |wterm_cr| in this section. @^system dependencies@> @= #define put(file) @[fwrite(&((file).d),sizeof((file).d),1,(file).f)@] #define get(file) @[fread(&((file).d),sizeof((file).d),1,(file).f)@] #define reset(file,name,mode) @[((file).f=fopen((char *)(name)+1,mode),\ (file).f!=NULL?get(file):0)@] #define rewrite(file,name,mode) @[((file).f=fopen((char *)(name)+1,mode))@] #define close(file) @[fclose((file).f)@] #define eof(file) @[feof((file).f)@] #define eoln(file) @[((file).d=='\n'||eof(file))@] #define erstat(file) @[((file).f==NULL?-1:ferror((file).f))@] #define read(file,x) @[((x)=(file).d,get(file))@] #define read_ln(file) @[do get(file); while (!eoln(file))@] #define write(file, format,...) @[fprintf(file.f,format,## __VA_ARGS__)@] #define write_ln(file,...) @[write(file,__VA_ARGS__"\n")@] #define wterm(format,...) @[write(term_out,format, ## __VA_ARGS__)@] #define wterm_ln(format,...) @[wterm(format "\n", ## __VA_ARGS__)@] #define wterm_cr @[write(term_out,"\n")@] #define wlog(format, ...) @[write(log_file,format, ## __VA_ARGS__)@] #define wlog_ln(format, ...) @[wlog(format "\n", ## __VA_ARGS__)@] #define wlog_cr @[write(log_file,"\n")@] @ To end a line of text output, we call |print_ln|. @= void print_ln(void) /*prints an end-of-line*/ {@+switch (selector) { case term_and_log: {@+wterm_cr;wlog_cr; term_offset=0;file_offset=0; } @+break; case log_only: {@+wlog_cr;file_offset=0; } @+break; case term_only: {@+wterm_cr;term_offset=0; } @+break; case no_print: case pseudo: case new_string: do_nothing;@+break; default:write_ln(write_file[selector]); } @/ } /*|tally| is not affected*/ @ The |print_char| procedure sends one character to the desired destination, using the |xchr| array to map it into an external character compatible with |input_ln|. All printing comes through |print_ln| or |print_char|. @= void print_char(ASCII_code @!s) /*prints a single character*/ {@+ if (@) if (selector < pseudo) {@+print_ln();return; } switch (selector) { case term_and_log: {@+wterm("%c",xchr[s]);wlog("%c",xchr[s]); incr(term_offset);incr(file_offset); if (term_offset==max_print_line) {@+wterm_cr;term_offset=0; } if (file_offset==max_print_line) {@+wlog_cr;file_offset=0; } } @+break; case log_only: {@+wlog("%c",xchr[s]);incr(file_offset); if (file_offset==max_print_line) print_ln(); } @+break; case term_only: {@+wterm("%c",xchr[s]);incr(term_offset); if (term_offset==max_print_line) print_ln(); } @+break; case no_print: do_nothing;@+break; case pseudo: if (tally < trick_count) trick_buf[tally%error_line]=s;@+break; case new_string: {@+if (pool_ptr < pool_size) append_char(s); } @+break; /*we drop characters if the string space is full*/ default:write(write_file[selector],"%c", xchr[s]); } @/ incr(tally); } @ An entire string is output by calling |print|. Note that if we are outputting the single standard ASCII character \.c, we could call |print('c')|, since |'c'==99| is the number of a single-character string, as explained above. But |print_char('c')| is quicker, so \TeX\ goes directly to the |print_char| routine when it knows that this is safe. (The present implementation assumes that it is always safe to print a visible ASCII character.) @^system dependencies@> @= void print(int @!s) /*prints string |s|*/ {@+ pool_pointer j; /*current character code position*/ int @!nl; /*new-line character to restore*/ if (s >= str_ptr) s=@[@<|"???"|@>@]; /*this can't happen*/ @.???@> else if (s < 256) if (s < 0) s=@[@<|"???"|@>@]; /*can't happen*/ else{@+if (selector > pseudo) {@+print_char(s);return; /*internal strings are not expanded*/ } if ((@)) if (selector < pseudo) {@+print_ln();return; } nl=new_line_char;new_line_char=-1; /*temporarily disable new-line character*/ j=str_start[s]; while (j < str_start[s+1]) {@+print_char(so(str_pool[j]));incr(j); } new_line_char=nl;return; } j=str_start[s]; while (j < str_start[s+1]) {@+print_char(so(str_pool[j]));incr(j); } } void print_str(char *s) /* the simple version */ {while (*s!=0) print_char(*s++);@+ } @ Control sequence names, file names, and strings constructed with \.{\\string} might contain |ASCII_code| values that can't be printed using |print_char|. Therefore we use |slow_print| for them: @= void slow_print(int @!s) /*prints string |s|*/ {@+pool_pointer j; /*current character code position*/ if ((s >= str_ptr)||(s < 256)) print(s); else{@+j=str_start[s]; while (j < str_start[s+1]) {@+print(so(str_pool[j]));incr(j); } } } @ Here is the very first thing that \TeX\ prints: a headline that identifies the version number and format package. The |term_offset| variable is temporarily incorrect, but the discrepancy is not serious since we assume that the banner and format identifier together will occupy at most |max_print_line| character positions. @= wterm("%s",banner); if (format_ident==0) wterm_ln(" (no format preloaded)"); else{@+slow_print(format_ident);print_ln(); } update_terminal; @ The procedure |print_nl| is like |print|, but it makes sure that the string appears at the beginning of a new line. @= void print_nl(char *@!s) /*prints string |s| at beginning of line*/ {@+if (((term_offset > 0)&&(odd(selector)))||@| ((file_offset > 0)&&(selector >= log_only))) print_ln(); print_str(s); } @ The procedure |print_esc| prints a string that is preceded by the user's escape character (which is usually a backslash). @= void print_esc(str_number @!s) /*prints escape character, then |s|*/ {@+int c; /*the escape character code*/ @; if (c >= 0) if (c < 256) print(c); slow_print(s); } @ An array of digits in the range |0 dotdot 15| is printed by |print_the_digs|. @= void print_the_digs(eight_bits @!k) /*prints |dig[k-1]|$\,\ldots\,$|dig[0]|*/ {@+while (k > 0) {@+decr(k); if (dig[k] < 10) print_char('0'+dig[k]); else print_char('A'-10+dig[k]); } } @ The following procedure, which prints out the decimal representation of a given integer |n|, has been written carefully so that it works properly if |n==0| or if |(-n)| would cause overflow. It does not apply |%| or |/| to negative arguments, since such operations are not implemented consistently by all \PASCAL\ compilers. @= void print_int(int @!n) /*prints an integer in decimal form*/ {@+uint8_t k; /*index to current digit; we assume that $|n|<10^{23}$*/ int @!m; /*used to negate |n| in possibly dangerous cases*/ k=0; if (n < 0) {@+print_char('-'); if (n > -100000000) negate(n); else{@+m=-1-n;n=m/10;m=(m%10)+1;k=1; if (m < 10) dig[0]=m; else{@+dig[0]=0;incr(n); } } } @/do@+{dig[k]=n%10;n=n/10;incr(k); }@+ while (!(n==0)); print_the_digs(k); } @ Here is a trivial procedure to print two digits; it is usually called with a parameter in the range |0 <= n <= 99|. @p void print_two(int @!n) /*prints two least significant digits*/ {@+n=abs(n)%100;print_char('0'+(n/10)); print_char('0'+(n%10)); } @ Hexadecimal printing of nonnegative integers is accomplished by |print_hex|. @p void print_hex(int @!n) /*prints a positive integer in hexadecimal form*/ {@+uint8_t k; /*index to current digit; we assume that $0\L n<16^{22}$*/ k=0;print_char('"'); @/do@+{dig[k]=n%16;n=n/16;incr(k); }@+ while (!(n==0)); print_the_digs(k); } @ Old versions of \TeX\ needed a procedure called |print_ASCII| whose function is now subsumed by |print|. We retain the old name here as a possible aid to future software arch\ae ologists. @d print_ASCII print @ Roman numerals are produced by the |print_roman_int| routine. Readers who like puzzles might enjoy trying to figure out how this tricky code works; therefore no explanation will be given. Notice that 1990 yields \.{mcmxc}, not \.{mxm}. @p void print_roman_int(int @!n) {@+ pool_pointer j, @!k; /*mysterious indices into |str_pool|*/ nonnegative_integer @!u, @!v; /*mysterious numbers*/ j=str_start[@[@<|"m2d5c2l5x2v5i"|@>@]];v=1000; loop@+{@+while (n >= v) {@+print_char(so(str_pool[j]));n=n-v; } if (n <= 0) return; /*nonpositive input produces no output*/ k=j+2;u=v/(so(str_pool[k-1])-'0'); if (str_pool[k-1]==si('2')) {@+k=k+2;u=u/(so(str_pool[k-1])-'0'); } if (n+u >= v) {@+print_char(so(str_pool[k]));n=n+u; } else{@+j=j+2;v=v/(so(str_pool[j-1])-'0'); } } } @ The |print| subroutine will not print a string that is still being created. The following procedure will. @p void print_current_string(void) /*prints a yet-unmade string*/ {@+pool_pointer j; /*points to current character code*/ j=str_start[str_ptr]; while (j < pool_ptr) {@+print_char(so(str_pool[j]));incr(j); } } @ Here is a procedure that asks the user to type a line of input, assuming that the |selector| setting is either |term_only| or |term_and_log|. The input is placed into locations |first| through |last-1| of the |buffer| array, and echoed on the transcript file if appropriate. This procedure is never called when |interaction < scroll_mode|. @d prompt_input(X) {@+wake_up_terminal;print_str(X);term_input(); } /*prints a string and gets a line of input*/ @p void term_input(void) /*gets a line from the terminal*/ {@+int k; /*index into |buffer|*/ update_terminal; /*now the user sees the prompt for sure*/ if (!input_ln(&term_in, true)) fatal_error("End of file on the terminal!"); @.End of file on the terminal@> term_offset=0; /*the user's line ended with \<\rm return>*/ decr(selector); /*prepare to echo the input*/ if (last!=first) for (k=first; k<=last-1; k++) print(buffer[k]); print_ln();incr(selector); /*restore previous status*/ } @* Reporting errors. When something anomalous is detected, \TeX\ typically does something like this: $$\vbox{\halign{#\hfil\cr |print_err("Something anomalous has been detected");|\cr |help3("This is the first line of my offer to help.")|\cr |("This is the second line. I'm trying to")|\cr |("explain the best way for you to proceed.");|\cr |error;|\cr}}$$ A two-line help message would be given using |help2|, etc.; these informal helps should use simple vocabulary that complements the words used in the official error message that was printed. (Outside the U.S.A., the help messages should preferably be translated into the local vernacular. Each line of help is at most 60 characters long, in the present implementation, so that |max_print_line| will not be exceeded.) The |print_err| procedure supplies a `\.!' before the official message, and makes sure that the terminal is awake if a stop is going to occur. The |error| procedure supplies a `\..' after the official message, then it shows the location of the error; and if |interaction==error_stop_mode|, it also enters into a dialog with the user, during which time the help message may be printed. @^system dependencies@> @ The global variable |interaction| has four settings, representing increasing amounts of user interaction: @d batch_mode 0 /*omits all stops and omits terminal output*/ @d nonstop_mode 1 /*omits all stops*/ @d scroll_mode 2 /*omits error stops*/ @d error_stop_mode 3 /*stops at every opportunity to interact*/ @d print_err(X) {@+if (interaction==error_stop_mode) wake_up_terminal; print_nl("! ");print_str(X); } @= uint8_t @!interaction; /*current level of interaction*/ @ @=interaction=error_stop_mode; @ \TeX\ is careful not to call |error| when the print |selector| setting might be unusual. The only possible values of |selector| at the time of error messages are \yskip\hang|no_print| (when |interaction==batch_mode| and |log_file| not yet open); \hang|term_only| (when |interaction > batch_mode| and |log_file| not yet open); \hang|log_only| (when |interaction==batch_mode| and |log_file| is open); \hang|term_and_log| (when |interaction > batch_mode| and |log_file| is open). @= if (interaction==batch_mode) selector=no_print;@+else selector=term_only @ A global variable |deletions_allowed| is set |false| if the |get_next| routine is active when |error| is called; this ensures that |get_next| and related routines like |get_token| will never be called recursively. A similar interlock is provided by |set_box_allowed|. @^recursion@> The global variable |history| records the worst level of error that has been detected. It has four possible values: |spotless|, |warning_issued|, |error_message_issued|, and |fatal_error_stop|. Another global variable, |error_count|, is increased by one when an |error| occurs without an interactive dialog, and it is reset to zero at the end of every paragraph. If |error_count| reaches 100, \TeX\ decides that there is no point in continuing further. @d spotless 0 /*|history| value when nothing has been amiss yet*/ @d warning_issued 1 /*|history| value when |begin_diagnostic| has been called*/ @d error_message_issued 2 /*|history| value when |error| has been called*/ @d fatal_error_stop 3 /*|history| value when termination was premature*/ @= bool @!deletions_allowed; /*is it safe for |error| to call |get_token|?*/ bool @!set_box_allowed; /*is it safe to do a \.{\\setbox} assignment?*/ uint8_t @!history; /*has the source input been clean so far?*/ int8_t @!error_count; /*the number of scrolled errors since the last paragraph ended*/ @ The value of |history| is initially |fatal_error_stop|, but it will be changed to |spotless| if \TeX\ survives the initialization process. @= deletions_allowed=true;set_box_allowed=true; error_count=0; /*|history| is initialized elsewhere*/ @ Since errors can be detected almost anywhere in \TeX, we want to declare the error procedures near the beginning of the program. But the error procedures in turn use some other procedures, which need to be declared |forward| before we get to |error| itself. It is possible for |error| to be called recursively if some error arises when |get_token| is being used to delete a token, and/or if some fatal error occurs while \TeX\ is trying to fix a non-fatal one. But such recursion @^recursion@> is never more than two levels deep. @= void normalize_selector(void);@/ void get_token(void);@/ void term_input(void);@/ void show_context(void);@/ void begin_file_reading(void);@/ void open_log_file(void);@/ void close_files_and_terminate(void);@/ void clear_for_error_prompt(void);@/ void give_err_help(void);@/ #ifdef @!DEBUG void debug_help(void); #else #define debug_help() do_nothing #endif @ Individual lines of help are recorded in the array |help_line|, which contains entries in positions |0 dotdot(help_ptr-1)|. They should be printed in reverse order, i.e., with |help_line[0]| appearing last. @d hlp1(X) help_line[0]=X;@+} @d hlp2(X) help_line[1]=X;hlp1 @d hlp3(X) help_line[2]=X;hlp2 @d hlp4(X) help_line[3]=X;hlp3 @d hlp5(X) help_line[4]=X;hlp4 @d hlp6(X) help_line[5]=X;hlp5 @d help0 help_ptr=0 /*sometimes there might be no help*/ @d help1 @+{@+help_ptr=1;hlp1 /*use this with one help line*/ @d help2 @+{@+help_ptr=2;hlp2 /*use this with two help lines*/ @d help3 @+{@+help_ptr=3;hlp3 /*use this with three help lines*/ @d help4 @+{@+help_ptr=4;hlp4 /*use this with four help lines*/ @d help5 @+{@+help_ptr=5;hlp5 /*use this with five help lines*/ @d help6 @+{@+help_ptr=6;hlp6 /*use this with six help lines*/ @= char *@!help_line[6]; /*helps for the next |error|*/ uint8_t @!help_ptr; /*the number of help lines present*/ bool @!use_err_help; /*should the |err_help| list be shown?*/ @ @= help_ptr=0;use_err_help=false; @ The |jump_out| procedure just cuts across all active procedure levels and goes to |end_of_TEX|. This is the only nontrivial |@!goto| statement in the whole program. It is used when there is no recovery from a particular error. Some \PASCAL\ compilers do not implement non-local |goto| statements. @^system dependencies@> In such cases the body of |jump_out| should simply be `|close_files_and_terminate|;\thinspace' followed by a call on some system procedure that quietly terminates the program. @= void jump_out(void) {@+ close_files_and_terminate(); exit(0); } @ Here now is the general |error| routine. @= void error(void) /*completes the job of error reporting*/ {@+ ASCII_code c; /*what the user types*/ int @!s1, @!s2, @!s3, @!s4; /*used to save global variables when deleting tokens*/ if (history < error_message_issued) history=error_message_issued; print_char('.');show_context(); if (interaction==error_stop_mode) @; incr(error_count); if (error_count==100) {@+print_nl("(That makes 100 errors; please try again.)"); @.That makes 100 errors...@> history=fatal_error_stop;jump_out(); } @; } @ @= loop@+{@+resume: clear_for_error_prompt();prompt_input("? "); @.?\relax@> if (last==first) return; c=buffer[first]; if (c >= 'a') c=c+'A'-'a'; /*convert to uppercase*/ @; } @ It is desirable to provide an `\.E' option here that gives the user an easy way to return from \TeX\ to the system editor, with the offending line ready to be edited. But such an extension requires some system wizardry, so the present implementation simply types out the name of the file that should be edited and the relevant line number. @^system dependencies@> There is a secret `\.D' option available when the debugging routines haven't been commented~out. @^debugging@> @= switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (deletions_allowed) @@;@+break; @t\4\4@>@; #ifdef @!DEBUG case 'D': {@+debug_help();goto resume;@+} #endif case 'E': if (base_ptr > 0) {@+print_nl("You want to edit file "); @.You want to edit file x@> slow_print(input_stack[base_ptr].name_field); print_str(" at line ");print_int(line); interaction=scroll_mode;jump_out(); } @+break; case 'H': @@; case 'I': @@; case 'Q': case 'R': case 'S': @@; case 'X': {@+interaction=scroll_mode;jump_out(); } @+break; default:do_nothing; } @/ @@; @ @= {@+print_str("Type to proceed, S to scroll future error messages,");@/ @.Type to proceed...@> print_nl("R to run without stopping, Q to run quietly,");@/ print_nl("I to insert something, "); if (base_ptr > 0) print_str("E to edit your file,"); if (deletions_allowed) print_nl("1 or ... or 9 to ignore the next 1 to 9 tokens of input,"); print_nl("H for help, X to quit."); } @ Here the author of \TeX\ apologizes for making use of the numerical relation between |'Q'|, |'R'|, |'S'|, and the desired interaction settings |batch_mode|, |nonstop_mode|, |scroll_mode|. @^Knuth, Donald Ervin@> @= {@+error_count=0;interaction=batch_mode+c-'Q'; print_str("OK, entering "); switch (c) { case 'Q': {@+print_esc(@[@<|"batchmode"|@>@]);decr(selector); } @+break; case 'R': print_esc(@[@<|"nonstopmode"|@>@]);@+break; case 'S': print_esc(@[@<|"scrollmode"|@>@]); } /*there are no other cases*/ print_str("...");print_ln();update_terminal;return; } @ When the following code is executed, |buffer[(first+1)dotdot(last-1)]| may contain the material inserted by the user; otherwise another prompt will be given. In order to understand this part of the program fully, you need to be familiar with \TeX's input stacks. @= {@+begin_file_reading(); /*enter a new syntactic level for terminal input*/ /*now |state==mid_line|, so an initial blank space will count as a blank*/ if (last > first+1) {@+loc=first+1;buffer[first]=' '; } else{@+prompt_input("insert>");loc=first; @.insert>@> } first=last; cur_input.limit_field=last-1; /*no |end_line_char| ends this line*/ return; } @ We allow deletion of up to 99 tokens at a time. @= {@+s1=cur_tok;s2=cur_cmd;s3=cur_chr;s4=align_state; align_state=1000000;OK_to_interrupt=false; if ((last > first+1)&&(buffer[first+1] >= '0')&&(buffer[first+1] <= '9')) c=c*10+buffer[first+1]-'0'*11; else c=c-'0'; while (c > 0) {@+get_token(); /*one-level recursive call of |error| is possible*/ decr(c); } cur_tok=s1;cur_cmd=s2;cur_chr=s3;align_state=s4;OK_to_interrupt=true; help2("I have just deleted some text, as you asked.")@/ ("You can now delete more, or insert, or whatever."); show_context();goto resume; } @ @= {@+if (use_err_help) {@+give_err_help();use_err_help=false; } else{@+if (help_ptr==0) help2("Sorry, I don't know how to help in this situation.")@/ @t\kern1em@>("Maybe you should try asking a human?"); @/do@+{decr(help_ptr);print_str(help_line[help_ptr]);print_ln(); }@+ while (!(help_ptr==0)); } help4("Sorry, I already gave what help I could...")@/ ("Maybe you should try asking a human?")@/ ("An error might have occurred before I noticed any problems.")@/ ("``If all else fails, read the instructions.'");@/ goto resume; } @ @= if (interaction > batch_mode) decr(selector); /*avoid terminal output*/ if (use_err_help) {@+print_ln();give_err_help(); } else while (help_ptr > 0) {@+decr(help_ptr);print_nl(help_line[help_ptr]); } print_ln(); if (interaction > batch_mode) incr(selector); /*re-enable terminal output*/ print_ln() @ A dozen or so error messages end with a parenthesized integer, so we save a teeny bit of program space by declaring the following procedure: @p void int_error(int @!n) {@+print_str(" (");print_int(n);print_char(')');error(); } @ In anomalous cases, the print selector might be in an unknown state; the following subroutine is called to fix things just enough to keep running a bit longer. @p void normalize_selector(void) {@+if (log_opened) selector=term_and_log; else selector=term_only; if (job_name==0) open_log_file(); if (interaction==batch_mode) decr(selector); } @ The following procedure prints \TeX's last words before dying. @d succumb {@+if (interaction==error_stop_mode) interaction=scroll_mode; /*no more interaction*/ if (log_opened) error(); if (interaction > batch_mode) debug_help(); history=fatal_error_stop;jump_out(); /*irrecoverable error*/ } @= void fatal_error(char *@!s) /*prints |s|, and that's it*/ {@+normalize_selector();@/ print_err("Emergency stop");help1(s);succumb; @.Emergency stop@> } @ Here is the most dreaded error message. @= void overflow(char *@!s, int @!n) /*stop due to finiteness*/ {@+normalize_selector(); print_err("TeX capacity exceeded, sorry ["); @.TeX capacity exceeded ...@> print_str(s);print_char('=');print_int(n);print_char(']'); help2("If you really absolutely need more capacity,")@/ ("you can ask a wizard to enlarge me."); succumb; } @ The program might sometime run completely amok, at which point there is no choice but to stop. If no previous error has been detected, that's bad news; a message is printed that is really intended for the \TeX\ maintenance person instead of the user (unless the user has been particularly diabolical). The index entries for `this can't happen' may help to pinpoint the problem. @^dry rot@> @= void confusion(str_number @!s) /*consistency check violated; |s| tells where*/ {@+normalize_selector(); if (history < error_message_issued) {@+print_err("This can't happen (");print(s);print_char(')'); @.This can't happen@> help1("I'm broken. Please show this to someone who can fix can fix"); } else{@+print_err("I can't go on meeting you like this"); @.I can't go on...@> help2("One of your faux pas seems to have wounded me deeply...")@/ ("in fact, I'm barely conscious. Please fix it and try again."); } succumb; } @ Users occasionally want to interrupt \TeX\ while it's running. If the \PASCAL\ runtime system allows this, one can implement a routine that sets the global variable |interrupt| to some nonzero value when such an interrupt is signalled. Otherwise there is probably at least a way to make |interrupt| nonzero using the \PASCAL\ debugger. @^system dependencies@> @^debugging@> @d check_interrupt {@+if (interrupt!=0) pause_for_instructions(); } @= int @!interrupt; /*should \TeX\ pause for instructions?*/ bool @!OK_to_interrupt; /*should interrupts be observed?*/ @ @= interrupt=0;OK_to_interrupt=true; @ When an interrupt has been detected, the program goes into its highest interaction level and lets the user have nearly the full flexibility of the |error| routine. \TeX\ checks for interrupts only at times when it is safe to do this. @p void pause_for_instructions(void) {@+if (OK_to_interrupt) {@+interaction=error_stop_mode; if ((selector==log_only)||(selector==no_print)) incr(selector); print_err("Interruption"); @.Interruption@> help3("You rang?")@/ ("Try to insert some instructions for me (e.g.,`I\\showlists'),")@/ ("unless you just want to quit by typing `X'."); deletions_allowed=false;error();deletions_allowed=true; interrupt=0; } } @* Arithmetic with scaled dimensions. The principal computations performed by \TeX\ are done entirely in terms of integers less than $2^{31}$ in magnitude; and divisions are done only when both dividend and divisor are nonnegative. Thus, the arithmetic specified in this program can be carried out in exactly the same way on a wide variety of computers, including some small ones. Why? Because the arithmetic calculations need to be spelled out precisely in order to guarantee that \TeX\ will produce identical output on different machines. If some quantities were rounded differently in different implementations, we would find that line breaks and even page breaks might occur in different places. Hence the arithmetic of \TeX\ has been designed with care, and systems that claim to be implementations of \TeX82 should follow precisely the @:TeX82}{\TeX82@> calculations as they appear in the present program. (Actually there are three places where \TeX\ uses |/| with a possibly negative numerator. These are harmless; see |/| in the index. Also if the user sets the \.{\\time} or the \.{\\year} to a negative value, some diagnostic information will involve negative-numerator division. The same remarks apply for |%| as well as for |/|.) @ Here is a routine that calculates half of an integer, using an unambiguous convention with respect to signed odd numbers. @p int half(int @!x) {@+if (odd(x)) return(x+1)/2; else return x/2; } @ Fixed-point arithmetic is done on {\sl scaled integers\/} that are multiples of $2^{-16}$. In other words, a binary point is assumed to be sixteen bit positions from the right end of a binary computer word. @d unity 0200000 /*$2^{16}$, represents 1.00000*/ @d two 0400000 /*$2^{17}$, represents 2.00000*/ @= typedef int scaled; /*this type is used for scaled integers*/ typedef uint32_t nonnegative_integer; /*$0\L x<2^{31}$*/ typedef uint8_t small_number; /*this type is self-explanatory*/ @ The following function is used to create a scaled integer from a given decimal fraction $(.d_0d_1\ldots d_{k-1})$, where |0 <= k <= 17|. The digit $d_i$ is given in |dig[i]|, and the calculation produces a correctly rounded result. @p scaled round_decimals(small_number @!k) /*converts a decimal fraction*/ {@+int a; /*the accumulator*/ a=0; while (k > 0) {@+decr(k);a=(a+dig[k]*two)/10; } return(a+1)/2; } @ Conversely, here is a procedure analogous to |print_int|. If the output of this procedure is subsequently read by \TeX\ and converted by the |round_decimals| routine above, it turns out that the original value will be reproduced exactly; the ``simplest'' such decimal number is output, but there is always at least one digit following the decimal point. The invariant relation in the \&{repeat} loop is that a sequence of decimal digits yet to be printed will yield the original number if and only if they form a fraction~$f$ in the range $s-\delta\L10\cdot2^{16}f unity) s=s+0100000-50000; /*round the last digit*/ print_char('0'+(s/unity));s=10*(s%unity);delta=delta*10; }@+ while (!(s <= delta)); } @ Physical sizes that a \TeX\ user specifies for portions of documents are represented internally as scaled points. Thus, if we define an `sp' (scaled @^sp@> point) as a unit equal to $2^{-16}$ printer's points, every dimension inside of \TeX\ is an integer number of sp. There are exactly 4,736,286.72 sp per inch. Users are not allowed to specify dimensions larger than $2^{30}-1$ sp, which is a distance of about 18.892 feet (5.7583 meters); two such quantities can be added without overflow on a 32-bit computer. The present implementation of \TeX\ does not check for overflow when @^overflow in arithmetic@> dimensions are added or subtracted. This could be done by inserting a few dozen tests of the form `\ignorespaces|if (x >= 010000000000)@/ @t\\{report\_overflow}@>|', but the chance of overflow is so remote that such tests do not seem worthwhile. \TeX\ needs to do only a few arithmetic operations on scaled quantities, other than addition and subtraction, and the following subroutines do most of the work. A single computation might use several subroutine calls, and it is desirable to avoid producing multiple error messages in case of arithmetic overflow; so the routines set the global variable |arith_error| to |true| instead of reporting errors directly to the user. Another global variable, |rem|, holds the remainder after a division. @= bool @!arith_error; /*has arithmetic overflow occurred recently?*/ scaled @!rem; /*amount subtracted to get an exact division*/ @ The first arithmetical subroutine we need computes $nx+y$, where |x| and~|y| are |scaled| and |n| is an integer. We will also use it to multiply integers. @d nx_plus_y(X, Y, Z) mult_and_add(X, Y, Z, 07777777777) @d mult_integers(X, Y) mult_and_add(X, Y, 0, 017777777777) @p scaled mult_and_add(int @!n, scaled @!x, scaled @!y, scaled @!max_answer) {@+if (n < 0) {@+negate(x);negate(n); } if (n==0) return y; else if (((x <= (max_answer-y)/n)&&(-x <= (max_answer+y)/n))) return n*x+y; else{@+arith_error=true;return 0; } } @ We also need to divide scaled dimensions by integers. @p scaled x_over_n(scaled @!x, int @!n) {@+bool negative; /*should |rem| be negated?*/ scaled x_over_n; negative=false; if (n==0) {@+arith_error=true;x_over_n=0;rem=x; } else{@+if (n < 0) {@+negate(x);negate(n);negative=true; } if (x >= 0) {@+x_over_n=x/n;rem=x%n; } else{@+x_over_n=-((-x)/n);rem=-((-x)%n); } } if (negative) negate(rem); return x_over_n;} @ Then comes the multiplication of a scaled number by a fraction |n/(double)d|, where |n| and |d| are nonnegative integers | <= @t$2^{16}$@>| and |d| is positive. It would be too dangerous to multiply by~|n| and then divide by~|d|, in separate operations, since overflow might well occur; and it would be too inaccurate to divide by |d| and then multiply by |n|. Hence this subroutine simulates 1.5-precision arithmetic. @p scaled xn_over_d(scaled @!x, int @!n, int @!d) {@+bool positive; /*was |x >= 0|?*/ nonnegative_integer @!t, @!u, @!v; /*intermediate quantities*/ scaled xn_over_d; if (x >= 0) positive=true; else{@+negate(x);positive=false; } t=(x%0100000)*n; u=(x/0100000)*n+(t/0100000); v=(u%d)*0100000+(t%0100000); if (u/d >= 0100000) arith_error=true; else u=0100000*(u/d)+(v/d); if (positive) {@+xn_over_d=u;rem=v%d; } else{@+xn_over_d=-u;rem=-(v%d); } return xn_over_d;} @ The next subroutine is used to compute the ``badness'' of glue, when a total~|t| is supposed to be made from amounts that sum to~|s|. According to {\sl The \TeX book}, the badness of this situation is $100(t/s)^3$; however, badness is simply a heuristic, so we need not squeeze out the last drop of accuracy when computing it. All we really want is an approximation that has similar properties. @:TeXbook}{\sl The \TeX book@> The actual method used to compute the badness is easier to read from the program than to describe in words. It produces an integer value that is a reasonably close approximation to $100(t/s)^3$, and all implementations of \TeX\ should use precisely this method. Any badness of $2^{13}$ or more is treated as infinitely bad, and represented by 10000. It is not difficult to prove that $$\hbox{|badness(t+1, s) >= badness(t, s) >= badness(t, s+1)|}.$$ The badness function defined here is capable of computing at most 1095 distinct values, but that is plenty. @d inf_bad 10000 /*infinitely bad value*/ @p halfword badness(scaled @!t, scaled @!s) /*compute badness, given |t >= 0|*/ {@+int r; /*approximation to $\alpha t/s$, where $\alpha^3\approx 100\cdot2^{18}$*/ if (t==0) return 0; else if (s <= 0) return inf_bad; else{@+if (t <= 7230584) r=(t*297)/s; /*$297^3=99.94\times2^{18}$*/ else if (s >= 1663497) r=t/(s/297); else r=t; if (r > 1290) return inf_bad; /*$1290^3<2^{31}<1291^3$*/ else return(r*r*r+0400000)/01000000; } /*that was $r^3/2^{18}$, rounded to the nearest integer*/ } @ When \TeX\ ``packages'' a list into a box, it needs to calculate the proportionality ratio by which the glue inside the box should stretch or shrink. This calculation does not affect \TeX's decision making, so the precise details of rounding, etc., in the glue calculation are not of critical importance for the consistency of results on different computers. We shall use the type |glue_ratio| for such proportionality ratios. A glue ratio should take the same amount of memory as an |int| (usually 32 bits) if it is to blend smoothly with \TeX's other data structures. Thus |glue_ratio| should be equivalent to |short_real| in some implementations of \PASCAL. Alternatively, it is possible to deal with glue ratios using nothing but fixed-point arithmetic; see {\sl TUGboat \bf3},1 (March 1982), 10--27. (But the routines cited there must be modified to allow negative glue ratios.) @^system dependencies@> @d set_glue_ratio_zero(X) X=0.0 /*store the representation of zero ratio*/ @d set_glue_ratio_one(X) X=1.0 /*store the representation of unit ratio*/ @d float(X) ((double)(X)) /*convert from |glue_ratio| to type |double|*/ @d unfloat(X) ((glue_ratio)(X)) /*convert from |double| to type |glue_ratio|*/ @d float_constant(X) ((double)(X)) /*convert |int| constant to |double|*/ @= typedef float32_t @!glue_ratio; /*one-word representation of a glue expansion factor*/ @* Packed data. In order to make efficient use of storage space, \TeX\ bases its major data structures on a |memory_word|, which contains either a (signed) integer, possibly scaled, or a (signed) |glue_ratio|, or a small number of fields that are one half or one quarter of the size used for storing integers. If |x| is a variable of type |memory_word|, it contains up to four fields that can be referred to as follows: $$\vbox{\halign{\hfil#&#\hfil&#\hfil\cr |x|&.|i|&(an |int|)\cr |x|&.|sc|\qquad&(a |scaled| integer)\cr |x|&.|gr|&(a |glue_ratio|)\cr |x.hh.lh|, |x.hh|&.|rh|&(two halfword fields)\cr |x.hh.b0|, |x.hh.b1|, |x.hh|&.|rh|&(two quarterword fields, one halfword field)\cr |x.qqqq.b0|, |x.qqqq.b1|, |x.qqqq|&.|b2|, |x.qqqq.b3|\hskip-100pt &\qquad\qquad\qquad(four quarterword fields)\cr}}$$ This is somewhat cumbersome to write, and not very readable either, but macros will be used to make the notation shorter and more transparent. The \PASCAL\ code below gives a formal definition of |memory_word| and its subsidiary types, using packed variant records. \TeX\ makes no assumptions about the relative positions of the fields within a word. Since we are assuming 32-bit integers, a halfword must contain at least 16 bits, and a quarterword must contain at least 8 bits. @^system dependencies@> But it doesn't hurt to have more bits; for example, with enough 36-bit words you might be able to have |mem_max| as large as 262142, which is eight times as much memory as anybody had during the first four years of \TeX's existence. N.B.: Valuable memory space will be dreadfully wasted unless \TeX\ is compiled by a \PASCAL\ that packs all of the |memory_word| variants into the space of a single integer. This means, for example, that |glue_ratio| words should be |short_real| instead of |double| on some computers. Some \PASCAL\ compilers will pack an integer whose subrange is `|0 dotdot 255|' into an eight-bit field, but others insist on allocating space for an additional sign bit; on such systems you can get 256 values into a quarterword only if the subrange is `|-128 dotdot 127|'. The present implementation tries to accommodate as many variations as possible, so it makes few assumptions. If integers having the subrange `|min_quarterword dotdot max_quarterword|' can be packed into a quarterword, and if integers having the subrange `|min_halfword dotdot max_halfword|' can be packed into a halfword, everything should work satisfactorily. It is usually most efficient to have |min_quarterword==min_halfword==0|, so one should try to achieve this unless it causes a severe problem. The values defined here are recommended for most 32-bit computers. @d min_quarterword 0 /*smallest allowable value in a |quarterword|*/ @d max_quarterword 255 /*largest allowable value in a |quarterword|*/ @d min_halfword 0 /*smallest allowable value in a |halfword|*/ @d max_halfword 65535 /*largest allowable value in a |halfword|*/ @ Here are the inequalities that the quarterword and halfword values must satisfy (or rather, the inequalities that they mustn't satisfy): @= #ifdef @!INIT if ((mem_min!=mem_bot)||(mem_max!=mem_top)) bad=10; #endif @;@/ if ((mem_min > mem_bot)||(mem_max < mem_top)) bad=10; if ((min_quarterword > 0)||(max_quarterword < 127)) bad=11; if ((min_halfword > 0)||(max_halfword < 32767)) bad=12; if ((min_quarterword < min_halfword)||@| (max_quarterword > max_halfword)) bad=13; if ((mem_min < min_halfword)||(mem_max >= max_halfword)||@| (mem_bot-mem_min > max_halfword+1)) bad=14; if ((font_base < min_quarterword)||(font_max > max_quarterword)) bad=15; if (font_max > font_base+256) bad=16; if ((save_size > max_halfword)||(max_strings > max_halfword)) bad=17; if (buf_size > max_halfword) bad=18; if (max_quarterword-min_quarterword < 255) bad=19; @ The operation of adding or subtracting |min_quarterword| occurs quite frequently in \TeX, so it is convenient to abbreviate this operation by using the macros |qi| and |qo| for input and output to and from quarterword format. The inner loop of \TeX\ will run faster with respect to compilers that don't optimize expressions like `|x+0|' and `|x-0|', if these macros are simplified in the obvious way when |min_quarterword==0|. @^inner loop@>@^system dependencies@> @d qi(X) X+min_quarterword /*to put an |eight_bits| item into a quarterword*/ @d qo(X) X-min_quarterword /*to take an |eight_bits| item out of a quarterword*/ @d hi(X) X+min_halfword /*to put a sixteen-bit item into a halfword*/ @d ho(X) X-min_halfword /*to take a sixteen-bit item from a halfword*/ @ The reader should study the following definitions closely: @^system dependencies@> @d sc i /*|scaled| data is equivalent to |int|*/ @= typedef uint8_t quarterword; /*1/4 of a word*/ typedef uint16_t halfword; /*1/2 of a word*/ typedef uint8_t two_choices; /*used when there are two variants in a record*/ typedef uint8_t four_choices; /*used when there are four variants in a record*/ typedef struct { @;@/ halfword @!rh; union { halfword @!lh; struct { quarterword @!b0;quarterword @!b1;} ; };} two_halves; typedef struct { @;@/ quarterword @!b0; quarterword @!b1; quarterword @!b2; quarterword @!b3; } four_quarters; typedef struct { @;@/ union { int @!i; glue_ratio @!gr; two_halves @!hh; four_quarters @!qqqq; };} memory_word; typedef struct {@+FILE *f;@+memory_word@,d;@+} word_file; @ When debugging, we may want to print a |memory_word| without knowing what type it is; so we print it in all modes. @^dirty \PASCAL@>@^debugging@> @p #ifdef @!DEBUG void print_word(memory_word @!w) /*prints |w| in all ways*/ {@+print_int(w.i);print_char(' ');@/ print_scaled(w.sc);print_char(' ');@/ print_scaled(round(unity*float(w.gr)));print_ln();@/ @^real multiplication@> print_int(w.hh.lh);print_char('=');print_int(w.hh.b0);print_char(':'); print_int(w.hh.b1);print_char(';');print_int(w.hh.rh);print_char(' ');@/ print_int(w.qqqq.b0);print_char(':');print_int(w.qqqq.b1);print_char(':'); print_int(w.qqqq.b2);print_char(':');print_int(w.qqqq.b3); } #endif @* Dynamic memory allocation. The \TeX\ system does nearly all of its own memory allocation, so that it can readily be transported into environments that do not have automatic facilities for strings, garbage collection, etc., and so that it can be in control of what error messages the user receives. The dynamic storage requirements of \TeX\ are handled by providing a large array |mem| in which consecutive blocks of words are used as nodes by the \TeX\ routines. Pointer variables are indices into this array, or into another array called |eqtb| that will be explained later. A pointer variable might also be a special flag that lies outside the bounds of |mem|, so we allow pointers to assume any |halfword| value. The minimum halfword value represents a null pointer. \TeX\ does not assume that |mem[null]| exists. @d pointer halfword /*a flag or a location in |mem| or |eqtb|*/ @s pointer int @d null min_halfword /*the null pointer*/ @= pointer @!temp_ptr; /*a pointer variable for occasional emergency use*/ @ The |mem| array is divided into two regions that are allocated separately, but the dividing line between these two regions is not fixed; they grow together until finding their ``natural'' size in a particular job. Locations less than or equal to |lo_mem_max| are used for storing variable-length records consisting of two or more words each. This region is maintained using an algorithm similar to the one described in exercise 2.5--19 of {\sl The Art of Computer Programming}. However, no size field appears in the allocated nodes; the program is responsible for knowing the relevant size when a node is freed. Locations greater than or equal to |hi_mem_min| are used for storing one-word records; a conventional \.{AVAIL} stack is used for allocation in this region. Locations of |mem| between |mem_bot| and |mem_top| may be dumped as part of preloaded format files, by the \.{INITEX} preprocessor. @.INITEX@> Production versions of \TeX\ may extend the memory at both ends in order to provide more space; locations between |mem_min| and |mem_bot| are always used for variable-size nodes, and locations between |mem_top| and |mem_max| are always used for single-word nodes. The key pointers that govern |mem| allocation have a prescribed order: $$\advance\thickmuskip-2mu \hbox{|null <= mem_min <= mem_bot < lo_mem_max < hi_mem_min < mem_top <= mem_end <= mem_max|.}$$ Empirical tests show that the present implementation of \TeX\ tends to spend about 9\pct! of its running time allocating nodes, and about 6\pct! deallocating them after their use. @= memory_word @!mem0[mem_max-mem_min+1], *const @!mem = @!mem0-mem_min; /*the big dynamic storage area*/ pointer @!lo_mem_max; /*the largest location of variable-size memory in use*/ pointer @!hi_mem_min; /*the smallest location of one-word memory in use*/ @ In order to study the memory requirements of particular applications, it is possible to prepare a version of \TeX\ that keeps track of current and maximum memory usage. When code between the delimiters | #ifdef @!STAT | $\ldots$ |tats| is not ``commented out,'' \TeX\ will run a bit slower but it will report these statistics when |tracing_stats| is sufficiently large. @= int @!var_used, @!dyn_used; /*how much memory is in use*/ #ifdef @!DEBUG #define incr_dyn_used @[incr(dyn_used)@] #define decr_dyn_used @[decr(dyn_used)@] #else #define incr_dyn_used #define decr_dyn_used #endif @ Let's consider the one-word memory region first, since it's the simplest. The pointer variable |mem_end| holds the highest-numbered location of |mem| that has ever been used. The free locations of |mem| that occur between |hi_mem_min| and |mem_end|, inclusive, are of type |two_halves|, and we write |info(p)| and |link(p)| for the |lh| and |rh| fields of |mem[p]| when it is of this type. The single-word free locations form a linked list $$|avail|,\;\hbox{|link(avail)|},\;\hbox{|link(link(avail))|},\;\ldots$$ terminated by |null|. @d link(X) mem[X].hh.rh /*the |link| field of a memory word*/ @d info(X) mem[X].hh.lh /*the |info| field of a memory word*/ @= pointer @!avail; /*head of the list of available one-word nodes*/ pointer @!mem_end; /*the last one-word node used in |mem|*/ @ If memory is exhausted, it might mean that the user has forgotten a right brace. We will define some procedures later that try to help pinpoint the trouble. @p@@; @@; @ The function |get_avail| returns a pointer to a new one-word node whose |link| field is null. However, \TeX\ will halt if there is no more room left. @^inner loop@> If the available-space list is empty, i.e., if |avail==null|, we try first to increase |mem_end|. If that cannot be done, i.e., if |mem_end==mem_max|, we try to decrease |hi_mem_min|. If that cannot be done, i.e., if |hi_mem_min==lo_mem_max+1|, we have to quit. @p pointer get_avail(void) /*single-word node allocation*/ {@+pointer p; /*the new node being got*/ p=avail; /*get top location in the |avail| stack*/ if (p!=null) avail=link(avail); /*and pop it off*/ else if (mem_end < mem_max) /*or go into virgin territory*/ {@+incr(mem_end);p=mem_end; } else{@+decr(hi_mem_min);p=hi_mem_min; if (hi_mem_min <= lo_mem_max) {@+runaway(); /*if memory is exhausted, display possible runaway text*/ overflow("main memory size", mem_max+1-mem_min); /*quit; all one-word nodes are busy*/ @:TeX capacity exceeded main memory size}{\quad main memory size@> } } link(p)=null; /*provide an oft-desired initialization of the new node*/ incr_dyn_used; /*maintain statistics*/ return p; } @ Conversely, a one-word node is recycled by calling |free_avail|. This routine is part of \TeX's ``inner loop,'' so we want it to be fast. @^inner loop@> @d free_avail(X) /*single-word node liberation*/ {@+link(X)=avail;avail=X; decr_dyn_used; } @ There's also a |fast_get_avail| routine, which saves the procedure-call overhead at the expense of extra programming. This routine is used in the places that would otherwise account for the most calls of |get_avail|. @^inner loop@> @d fast_get_avail(X) @t@>@;@/ {@+X=avail; /*avoid |get_avail| if possible, to save time*/ if (X==null) X=get_avail(); else{@+avail=link(X);link(X)=null; incr_dyn_used; } } @ The procedure |flush_list(p)| frees an entire linked list of one-word nodes that starts at position |p|. @^inner loop@> @p void flush_list(pointer @!p) /*makes list of single-word nodes available*/ {@+pointer @!q, @!r; /*list traversers*/ if (p!=null) {@+r=p; @/do@+{q=r;r=link(r); decr_dyn_used; }@+ while (!(r==null)); /*now |q| is the last node on the list*/ link(q)=avail;avail=p; } } @ The available-space list that keeps track of the variable-size portion of |mem| is a nonempty, doubly-linked circular list of empty nodes, pointed to by the roving pointer |rover|. Each empty node has size 2 or more; the first word contains the special value |max_halfword| in its |link| field and the size in its |info| field; the second word contains the two pointers for double linking. Each nonempty node also has size 2 or more. Its first word is of type |two_halves|\kern-1pt, and its |link| field is never equal to |max_halfword|. Otherwise there is complete flexibility with respect to the contents of its other fields and its other words. (We require |mem_max < max_halfword| because terrible things can happen when |max_halfword| appears in the |link| field of a nonempty node.) @d empty_flag max_halfword /*the |link| of an empty variable-size node*/ @d is_empty(X) (link(X)==empty_flag) /*tests for empty node*/ @d node_size info /*the size field in empty variable-size nodes*/ @d llink(X) info(X+1) /*left link in doubly-linked list of empty nodes*/ @d rlink(X) link(X+1) /*right link in doubly-linked list of empty nodes*/ @= pointer @!rover; /*points to some node in the list of empties*/ @ A call to |get_node| with argument |s| returns a pointer to a new node of size~|s|, which must be 2~or more. The |link| field of the first word of this new node is set to null. An overflow stop occurs if no suitable space exists. If |get_node| is called with $s=2^{30}$, it simply merges adjacent free areas and returns the value |max_halfword|. @p pointer get_node(int @!s) /*variable-size node allocation*/ {@+ pointer p; /*the node currently under inspection*/ pointer @!q; /*the node physically after node |p|*/ int @!r; /*the newly allocated node, or a candidate for this honor*/ int @!t; /*temporary register*/ restart: p=rover; /*start at some free node in the ring*/ @/do@+{@; @^inner loop@> p=rlink(p); /*move to the next node in the ring*/ }@+ while (!(p==rover)); /*repeat until the whole list has been traversed*/ if (s==010000000000) {@+return max_halfword; } if (lo_mem_max+2 < hi_mem_min) if (lo_mem_max+2 <= mem_bot+max_halfword) @; overflow("main memory size", mem_max+1-mem_min); /*sorry, nothing satisfactory is left*/ @:TeX capacity exceeded main memory size}{\quad main memory size@> found: link(r)=null; /*this node is now nonempty*/ #ifdef @!STAT var_used=var_used+s; /*maintain usage statistics*/ #endif @;@/ return r; } @ The lower part of |mem| grows by 1000 words at a time, unless we are very close to going under. When it grows, we simply link a new node into the available-space list. This method of controlled growth helps to keep the |mem| usage consecutive when \TeX\ is implemented on ``virtual memory'' systems. @^virtual memory@> @= {@+if (hi_mem_min-lo_mem_max >= 1998) t=lo_mem_max+1000; else t=lo_mem_max+1+(hi_mem_min-lo_mem_max)/2; /*|lo_mem_max+2 <= t < hi_mem_min|*/ p=llink(rover);q=lo_mem_max;rlink(p)=q;llink(rover)=q;@/ if (t > mem_bot+max_halfword) t=mem_bot+max_halfword; rlink(q)=rover;llink(q)=p;link(q)=empty_flag;node_size(q)=t-lo_mem_max;@/ lo_mem_max=t;link(lo_mem_max)=null;info(lo_mem_max)=null; rover=q;goto restart; } @ Empirical tests show that the routine in this section performs a node-merging operation about 0.75 times per allocation, on the average, after which it finds that |r > p+1| about 95\pct! of the time. @= q=p+node_size(p); /*find the physical successor*/ @^inner loop@> while (is_empty(q)) /*merge node |p| with node |q|*/ {@+t=rlink(q); if (q==rover) rover=t; llink(t)=llink(q);rlink(llink(q))=t;@/ q=q+node_size(q); } r=q-s; if (r > p+1) @; if (r==p) if (rlink(p)!=p) @; node_size(p)=q-p /*reset the size in case it grew*/ @ @= {@+node_size(p)=r-p; /*store the remaining size*/ @^inner loop@> rover=p; /*start searching here next time*/ goto found; } @ Here we delete node |p| from the ring, and let |rover| rove around. @= {@+rover=rlink(p);t=llink(p); llink(rover)=t;rlink(t)=rover; goto found; } @ Conversely, when some variable-size node |p| of size |s| is no longer needed, the operation |free_node(p, s)| will make its words available, by inserting |p| as a new empty node just before where |rover| now points. @^inner loop@> @p void free_node(pointer @!p, halfword @!s) /*variable-size node liberation*/ {@+pointer q; /*|llink(rover)|*/ node_size(p)=s;link(p)=empty_flag; q=llink(rover);llink(p)=q;rlink(p)=rover; /*set both links*/ llink(rover)=p;rlink(q)=p; /*insert |p| into the ring*/ #ifdef @!STAT var_used=var_used-s; #endif @; /*maintain statistics*/ } @ Just before \.{INITEX} writes out the memory, it sorts the doubly linked available space list. The list is probably very short at such times, so a simple insertion sort is used. The smallest available location will be pointed to by |rover|, the next-smallest by |rlink(rover)|, etc. @p #ifdef @!INIT void sort_avail(void) /*sorts the available variable-size nodes by location*/ {@+pointer p, @!q, @!r; /*indices into |mem|*/ pointer @!old_rover; /*initial |rover| setting*/ p=get_node(010000000000); /*merge adjacent free areas*/ p=rlink(rover);rlink(rover)=max_halfword;old_rover=rover; while (p!=old_rover) @; p=rover; while (rlink(p)!=max_halfword) {@+llink(rlink(p))=p;p=rlink(p); } rlink(p)=rover;llink(rover)=p; } #endif @ The following |while | loop is guaranteed to terminate, since the list that starts at |rover| ends with |max_halfword| during the sorting procedure. @= if (p < rover) {@+q=p;p=rlink(q);rlink(q)=rover;rover=q; } else{@+q=rover; while (rlink(q) < p) q=rlink(q); r=rlink(p);rlink(p)=rlink(q);rlink(q)=p;p=r; } @* Data structures for boxes and their friends. From the computer's standpoint, \TeX's chief mission is to create horizontal and vertical lists. We shall now investigate how the elements of these lists are represented internally as nodes in the dynamic memory. A horizontal or vertical list is linked together by |link| fields in the first word of each node. Individual nodes represent boxes, glue, penalties, or special things like discretionary hyphens; because of this variety, some nodes are longer than others, and we must distinguish different kinds of nodes. We do this by putting a `|type|' field in the first word, together with the link and an optional `|subtype|'. @d type(X) mem[X].hh.b0 /*identifies what kind of node this is*/ @d subtype(X) mem[X].hh.b1 /*secondary identification in some cases*/ @ A |@!char_node|, which represents a single character, is the most important kind of node because it accounts for the vast majority of all boxes. Special precautions are therefore taken to ensure that a |char_node| does not take up much memory space. Every such node is one word long, and in fact it is identifiable by this property, since other kinds of nodes have at least two words, and they appear in |mem| locations less than |hi_mem_min|. This makes it possible to omit the |type| field in a |char_node|, leaving us room for two bytes that identify a |font| and a |character| within that font. Note that the format of a |char_node| allows for up to 256 different fonts and up to 256 characters per font; but most implementations will probably limit the total number of fonts to fewer than 75 per job, and most fonts will stick to characters whose codes are less than 128 (since higher codes are more difficult to access on most keyboards). Extensions of \TeX\ intended for oriental languages will need even more than $256\times256$ possible characters, when we consider different sizes @^oriental characters@>@^Chinese characters@>@^Japanese characters@> and styles of type. It is suggested that Chinese and Japanese fonts be handled by representing such characters in two consecutive |char_node| entries: The first of these has |font==font_base|, and its |link| points to the second; the second identifies the font and the character dimensions. The saving feature about oriental characters is that most of them have the same box dimensions. The |character| field of the first |char_node| is a ``\\{charext}'' that distinguishes between graphic symbols whose dimensions are identical for typesetting purposes. (See the \MF\ manual.) Such an extension of \TeX\ would not be difficult; further details are left to the reader. In order to make sure that the |character| code fits in a quarterword, \TeX\ adds the quantity |min_quarterword| to the actual code. Character nodes appear only in horizontal lists, never in vertical lists. @d is_char_node(X) (X >= hi_mem_min) /*does the argument point to a |char_node|?*/ @d font type /*the font code in a |char_node|*/ @d character subtype /*the character code in a |char_node|*/ @ An |hlist_node| stands for a box that was made from a horizontal list. Each |hlist_node| is seven words long, and contains the following fields (in addition to the mandatory |type| and |link|, which we shall not mention explicitly when discussing the other node types): The |height| and |width| and |depth| are scaled integers denoting the dimensions of the box. There is also a |shift_amount| field, a scaled integer indicating how much this box should be lowered (if it appears in a horizontal list), or how much it should be moved to the right (if it appears in a vertical list). There is a |list_ptr| field, which points to the beginning of the list from which this box was fabricated; if |list_ptr| is |null|, the box is empty. Finally, there are three fields that represent the setting of the glue: |glue_set(p)| is a word of type |glue_ratio| that represents the proportionality constant for glue setting; |glue_sign(p)| is |stretching| or |shrinking| or |normal| depending on whether or not the glue should stretch or shrink or remain rigid; and |glue_order(p)| specifies the order of infinity to which glue setting applies (|normal|, |fil|, |fill|, or |filll|). The |subtype| field is not used. @d hlist_node 0 /*|type| of hlist nodes*/ @d box_node_size 7 /*number of words to allocate for a box node*/ @d width_offset 1 /*position of |width| field in a box node*/ @d depth_offset 2 /*position of |depth| field in a box node*/ @d height_offset 3 /*position of |height| field in a box node*/ @d width(X) mem[X+width_offset].sc /*width of the box, in sp*/ @d depth(X) mem[X+depth_offset].sc /*depth of the box, in sp*/ @d height(X) mem[X+height_offset].sc /*height of the box, in sp*/ @d shift_amount(X) mem[X+4].sc /*repositioning distance, in sp*/ @d list_offset 5 /*position of |list_ptr| field in a box node*/ @d list_ptr(X) link(X+list_offset) /*beginning of the list inside the box*/ @d glue_order(X) subtype(X+list_offset) /*applicable order of infinity*/ @d glue_sign(X) type(X+list_offset) /*stretching or shrinking*/ @d normal 0 /*the most common case when several cases are named*/ @d stretching 1 /*glue setting applies to the stretch components*/ @d shrinking 2 /*glue setting applies to the shrink components*/ @d glue_offset 6 /*position of |glue_set| in a box node*/ @d glue_set(X) mem[X+glue_offset].gr /*a word of type |glue_ratio| for glue setting*/ @ The |new_null_box| function returns a pointer to an |hlist_node| in which all subfields have the values corresponding to `\.{\\hbox\{\}}'. The |subtype| field is set to |min_quarterword|, since that's the desired |span_count| value if this |hlist_node| is changed to an |unset_node|. @p pointer new_null_box(void) /*creates a new box node*/ {@+pointer p; /*the new node*/ p=get_node(box_node_size);type(p)=hlist_node; subtype(p)=min_quarterword; width(p)=0;depth(p)=0;height(p)=0;shift_amount(p)=0;list_ptr(p)=null; glue_sign(p)=normal;glue_order(p)=normal;set_glue_ratio_zero(glue_set(p)); return p; } @ A |vlist_node| is like an |hlist_node| in all respects except that it contains a vertical list. @d vlist_node 1 /*|type| of vlist nodes*/ @ A |rule_node| stands for a solid black rectangle; it has |width|, |depth|, and |height| fields just as in an |hlist_node|. However, if any of these dimensions is $-2^{30}$, the actual value will be determined by running the rule up to the boundary of the innermost enclosing box. This is called a ``running dimension.'' The |width| is never running in an hlist; the |height| and |depth| are never running in a~vlist. @d rule_node 2 /*|type| of rule nodes*/ @d rule_node_size 4 /*number of words to allocate for a rule node*/ @d null_flag -010000000000 /*$-2^{30}$, signifies a missing item*/ @d is_running(X) (X==null_flag) /*tests for a running dimension*/ @ A new rule node is delivered by the |new_rule| function. It makes all the dimensions ``running,'' so you have to change the ones that are not allowed to run. @p pointer new_rule(void) {@+pointer p; /*the new node*/ p=get_node(rule_node_size);type(p)=rule_node; subtype(p)=0; /*the |subtype| is not used*/ width(p)=null_flag;depth(p)=null_flag;height(p)=null_flag; return p; } @ Insertions are represented by |ins_node| records, where the |subtype| indicates the corresponding box number. For example, `\.{\\insert 250}' leads to an |ins_node| whose |subtype| is |250+min_quarterword|. The |height| field of an |ins_node| is slightly misnamed; it actually holds the natural height plus depth of the vertical list being inserted. The |depth| field holds the |split_max_depth| to be used in case this insertion is split, and the |split_top_ptr| points to the corresponding |split_top_skip|. The |float_cost| field holds the |floating_penalty| that will be used if this insertion floats to a subsequent page after a split insertion of the same class. There is one more field, the |ins_ptr|, which points to the beginning of the vlist for the insertion. @d ins_node 3 /*|type| of insertion nodes*/ @d ins_node_size 5 /*number of words to allocate for an insertion*/ @d float_cost(X) mem[X+1].i /*the |floating_penalty| to be used*/ @d ins_ptr(X) info(X+4) /*the vertical list to be inserted*/ @d split_top_ptr(X) link(X+4) /*the |split_top_skip| to be used*/ @ A |mark_node| has a |mark_ptr| field that points to the reference count of a token list that contains the user's \.{\\mark} text. This field occupies a full word instead of a halfword, because there's nothing to put in the other halfword; it is easier in \PASCAL\ to use the full word than to risk leaving garbage in the unused half. @d mark_node 4 /*|type| of a mark node*/ @d small_node_size 2 /*number of words to allocate for most node types*/ @d mark_ptr(X) mem[X+1].i /*head of the token list for a mark*/ @ An |adjust_node|, which occurs only in horizontal lists, specifies material that will be moved out into the surrounding vertical list; i.e., it is used to implement \TeX's `\.{\\vadjust}' operation. The |adjust_ptr| field points to the vlist containing this material. @d adjust_node 5 /*|type| of an adjust node*/ @d adjust_ptr mark_ptr /*vertical list to be moved out of horizontal list*/ @ A |ligature_node|, which occurs only in horizontal lists, specifies a character that was fabricated from the interaction of two or more actual characters. The second word of the node, which is called the |lig_char| word, contains |font| and |character| fields just as in a |char_node|. The characters that generated the ligature have not been forgotten, since they are needed for diagnostic messages and for hyphenation; the |lig_ptr| field points to a linked list of character nodes for all original characters that have been deleted. (This list might be empty if the characters that generated the ligature were retained in other nodes.) The |subtype| field is 0, plus 2 and/or 1 if the original source of the ligature included implicit left and/or right boundaries. @d ligature_node 6 /*|type| of a ligature node*/ @d lig_char(X) X+1 /*the word where the ligature is to be found*/ @d lig_ptr(X) link(lig_char(X)) /*the list of characters*/ @ The |new_ligature| function creates a ligature node having given contents of the |font|, |character|, and |lig_ptr| fields. We also have a |new_lig_item| function, which returns a two-word node having a given |character| field. Such nodes are used for temporary processing as ligatures are being created. @p pointer new_ligature(quarterword @!f, quarterword @!c, pointer @!q) {@+pointer p; /*the new node*/ p=get_node(small_node_size);type(p)=ligature_node; font(lig_char(p))=f;character(lig_char(p))=c;lig_ptr(p)=q; subtype(p)=0;return p; } @# pointer new_lig_item(quarterword @!c) {@+pointer p; /*the new node*/ p=get_node(small_node_size);character(p)=c;lig_ptr(p)=null; return p; } @ A |disc_node|, which occurs only in horizontal lists, specifies a ``dis\-cretion\-ary'' line break. If such a break occurs at node |p|, the text that starts at |pre_break(p)| will precede the break, the text that starts at |post_break(p)| will follow the break, and text that appears in the next |replace_count(p)| nodes will be ignored. For example, an ordinary discretionary hyphen, indicated by `\.{\\-}', yields a |disc_node| with |pre_break| pointing to a |char_node| containing a hyphen, |post_break==null|, and |replace_count==0|. All three of the discretionary texts must be lists that consist entirely of character, kern, box, rule, and ligature nodes. If |pre_break(p)==null|, the |ex_hyphen_penalty| will be charged for this break. Otherwise the |hyphen_penalty| will be charged. The texts will actually be substituted into the list by the line-breaking algorithm if it decides to make the break, and the discretionary node will disappear at that time; thus, the output routine sees only discretionaries that were not chosen. @d disc_node 7 /*|type| of a discretionary node*/ @d replace_count subtype /*how many subsequent nodes to replace*/ @d pre_break llink /*text that precedes a discretionary break*/ @d post_break rlink /*text that follows a discretionary break*/ @p pointer new_disc(void) /*creates an empty |disc_node|*/ {@+pointer p; /*the new node*/ p=get_node(small_node_size);type(p)=disc_node; replace_count(p)=0;pre_break(p)=null;post_break(p)=null; return p; } @ A |whatsit_node| is a wild card reserved for extensions to \TeX. The |subtype| field in its first word says what `\\{whatsit}' it is, and implicitly determines the node size (which must be 2 or more) and the format of the remaining words. When a |whatsit_node| is encountered in a list, special actions are invoked; knowledgeable people who are careful not to mess up the rest of \TeX\ are able to make \TeX\ do new things by adding code at the end of the program. For example, there might be a `\TeX nicolor' extension to specify different colors of ink, @^extensions to \TeX@> and the whatsit node might contain the desired parameters. The present implementation of \TeX\ treats the features associated with `\.{\\write}' and `\.{\\special}' as if they were extensions, in order to illustrate how such routines might be coded. We shall defer further discussion of extensions until the end of this program. @d whatsit_node 8 /*|type| of special extension nodes*/ @ A |math_node|, which occurs only in horizontal lists, appears before and after mathematical formulas. The |subtype| field is |before| before the formula and |after| after it. There is a |width| field, which represents the amount of surrounding space inserted by \.{\\mathsurround}. @d math_node 9 /*|type| of a math node*/ @d before 0 /*|subtype| for math node that introduces a formula*/ @d after 1 /*|subtype| for math node that winds up a formula*/ @p pointer new_math(scaled @!w, small_number @!s) {@+pointer p; /*the new node*/ p=get_node(small_node_size);type(p)=math_node; subtype(p)=s;width(p)=w;return p; } @ \TeX\ makes use of the fact that |hlist_node|, |vlist_node|, |rule_node|, |ins_node|, |mark_node|, |adjust_node|, |ligature_node|, |disc_node|, |whatsit_node|, and |math_node| are at the low end of the type codes, by permitting a break at glue in a list if and only if the |type| of the previous node is less than |math_node|. Furthermore, a node is discarded after a break if its type is |math_node| or~more. @d precedes_break(X) (type(X) < math_node) @d non_discardable(X) (type(X) < math_node) @ A |glue_node| represents glue in a list. However, it is really only a pointer to a separate glue specification, since \TeX\ makes use of the fact that many essentially identical nodes of glue are usually present. If |p| points to a |glue_node|, |glue_ptr(p)| points to another packet of words that specify the stretch and shrink components, etc. Glue nodes also serve to represent leaders; the |subtype| is used to distinguish between ordinary glue (which is called |normal|) and the three kinds of leaders (which are called |a_leaders|, |c_leaders|, and |x_leaders|). The |leader_ptr| field points to a rule node or to a box node containing the leaders; it is set to |null| in ordinary glue nodes. Many kinds of glue are computed from \TeX's ``skip'' parameters, and it is helpful to know which parameter has led to a particular glue node. Therefore the |subtype| is set to indicate the source of glue, whenever it originated as a parameter. We will be defining symbolic names for the parameter numbers later (e.g., |line_skip_code==0|, |baseline_skip_code==1|, etc.); it suffices for now to say that the |subtype| of parametric glue will be the same as the parameter number, plus~one. In math formulas there are two more possibilities for the |subtype| in a glue node: |mu_glue| denotes an \.{\\mskip} (where the units are scaled \.{mu} instead of scaled \.{pt}); and |cond_math_glue| denotes the `\.{\\nonscript}' feature that cancels the glue node immediately following if it appears in a subscript. @d glue_node 10 /*|type| of node that points to a glue specification*/ @d cond_math_glue 98 /*special |subtype| to suppress glue in the next node*/ @d mu_glue 99 /*|subtype| for math glue*/ @d a_leaders 100 /*|subtype| for aligned leaders*/ @d c_leaders 101 /*|subtype| for centered leaders*/ @d x_leaders 102 /*|subtype| for expanded leaders*/ @d glue_ptr llink /*pointer to a glue specification*/ @d leader_ptr rlink /*pointer to box or rule node for leaders*/ @ A glue specification has a halfword reference count in its first word, @^reference counts@> representing |null| plus the number of glue nodes that point to it (less one). Note that the reference count appears in the same position as the |link| field in list nodes; this is the field that is initialized to |null| when a node is allocated, and it is also the field that is flagged by |empty_flag| in empty nodes. Glue specifications also contain three |scaled| fields, for the |width|, |stretch|, and |shrink| dimensions. Finally, there are two one-byte fields called |stretch_order| and |shrink_order|; these contain the orders of infinity (|normal|, |fil|, |fill|, or |filll|) corresponding to the stretch and shrink values. @d glue_spec_size 4 /*number of words to allocate for a glue specification*/ @d glue_ref_count(X) link(X) /*reference count of a glue specification*/ @d stretch(X) mem[X+2].sc /*the stretchability of this glob of glue*/ @d shrink(X) mem[X+3].sc /*the shrinkability of this glob of glue*/ @d stretch_order type /*order of infinity for stretching*/ @d shrink_order subtype /*order of infinity for shrinking*/ @d fil 1 /*first-order infinity*/ @d fill 2 /*second-order infinity*/ @d filll 3 /*third-order infinity*/ @= typedef uint8_t glue_ord; /*infinity to the 0, 1, 2, or 3 power*/ @ Here is a function that returns a pointer to a copy of a glue spec. The reference count in the copy is |null|, because there is assumed to be exactly one reference to the new specification. @p pointer new_spec(pointer @!p) /*duplicates a glue specification*/ {@+pointer q; /*the new spec*/ q=get_node(glue_spec_size);@/ mem[q]=mem[p];glue_ref_count(q)=null;@/ width(q)=width(p);stretch(q)=stretch(p);shrink(q)=shrink(p); return q; } @ And here's a function that creates a glue node for a given parameter identified by its code number; for example, |new_param_glue(line_skip_code)| returns a pointer to a glue node for the current \.{\\lineskip}. @p pointer new_param_glue(small_number @!n) {@+pointer p; /*the new node*/ pointer @!q; /*the glue specification*/ p=get_node(small_node_size);type(p)=glue_node;subtype(p)=n+1; leader_ptr(p)=null;@/ q=@@t@>; glue_ptr(p)=q;incr(glue_ref_count(q)); return p; } @ Glue nodes that are more or less anonymous are created by |new_glue|, whose argument points to a glue specification. @p pointer new_glue(pointer @!q) {@+pointer p; /*the new node*/ p=get_node(small_node_size);type(p)=glue_node;subtype(p)=normal; leader_ptr(p)=null;glue_ptr(p)=q;incr(glue_ref_count(q)); return p; } @ Still another subroutine is needed: This one is sort of a combination of |new_param_glue| and |new_glue|. It creates a glue node for one of the current glue parameters, but it makes a fresh copy of the glue specification, since that specification will probably be subject to change, while the parameter will stay put. The global variable |temp_ptr| is set to the address of the new spec. @p pointer new_skip_param(small_number @!n) {@+pointer p; /*the new node*/ temp_ptr=new_spec(@); p=new_glue(temp_ptr);glue_ref_count(temp_ptr)=null;subtype(p)=n+1; return p; } @ A |kern_node| has a |width| field to specify a (normally negative) amount of spacing. This spacing correction appears in horizontal lists between letters like A and V when the font designer said that it looks better to move them closer together or further apart. A kern node can also appear in a vertical list, when its `|width|' denotes additional spacing in the vertical direction. The |subtype| is either |normal| (for kerns inserted from font information or math mode calculations) or |explicit| (for kerns inserted from \.{\\kern} and \.{\\/} commands) or |acc_kern| (for kerns inserted from non-math accents) or |mu_glue| (for kerns inserted from \.{\\mkern} specifications in math formulas). @d kern_node 11 /*|type| of a kern node*/ @d explicit 1 /*|subtype| of kern nodes from \.{\\kern} and \.{\\/}*/ @d acc_kern 2 /*|subtype| of kern nodes from accents*/ @ The |new_kern| function creates a kern node having a given width. @p pointer new_kern(scaled @!w) {@+pointer p; /*the new node*/ p=get_node(small_node_size);type(p)=kern_node; subtype(p)=normal; width(p)=w; return p; } @ A |penalty_node| specifies the penalty associated with line or page breaking, in its |penalty| field. This field is a fullword integer, but the full range of integer values is not used: Any penalty | >= 10000| is treated as infinity, and no break will be allowed for such high values. Similarly, any penalty | <= -10000| is treated as negative infinity, and a break will be forced. @d penalty_node 12 /*|type| of a penalty node*/ @d inf_penalty inf_bad /*``infinite'' penalty value*/ @d eject_penalty (-inf_penalty) /*``negatively infinite'' penalty value*/ @d penalty(X) mem[X+1].i /*the added cost of breaking a list here*/ @ Anyone who has been reading the last few sections of the program will be able to guess what comes next. @p pointer new_penalty(int @!m) {@+pointer p; /*the new node*/ p=get_node(small_node_size);type(p)=penalty_node; subtype(p)=0; /*the |subtype| is not used*/ penalty(p)=m;return p; } @ You might think that we have introduced enough node types by now. Well, almost, but there is one more: An |unset_node| has nearly the same format as an |hlist_node| or |vlist_node|; it is used for entries in \.{\\halign} or \.{\\valign} that are not yet in their final form, since the box dimensions are their ``natural'' sizes before any glue adjustment has been made. The |glue_set| word is not present; instead, we have a |glue_stretch| field, which contains the total stretch of order |glue_order| that is present in the hlist or vlist being boxed. Similarly, the |shift_amount| field is replaced by a |glue_shrink| field, containing the total shrink of order |glue_sign| that is present. The |subtype| field is called |span_count|; an unset box typically contains the data for |qo(span_count)+1| columns. Unset nodes will be changed to box nodes when alignment is completed. @d unset_node 13 /*|type| for an unset node*/ @d glue_stretch(X) mem[X+glue_offset].sc /*total stretch in an unset node*/ @d glue_shrink shift_amount /*total shrink in an unset node*/ @d span_count subtype /*indicates the number of spanned columns*/ @ In fact, there are still more types coming. When we get to math formula processing we will see that a |style_node| has |type==14|; and a number of larger type codes will also be defined, for use in math mode only. @ Warning: If any changes are made to these data structure layouts, such as changing any of the node sizes or even reordering the words of nodes, the |copy_node_list| procedure and the memory initialization code below may have to be changed. Such potentially dangerous parts of the program are listed in the index under `data structure assumptions'. @!@^data structure assumptions@> However, other references to the nodes are made symbolically in terms of the \.{WEB} macro definitions above, so that format changes will leave \TeX's other algorithms intact. @^system dependencies@> @* Memory layout. Some areas of |mem| are dedicated to fixed usage, since static allocation is more efficient than dynamic allocation when we can get away with it. For example, locations |mem_bot| to |mem_bot+3| are always used to store the specification for glue that is `\.{0pt plus 0pt minus 0pt}'. The following macro definitions accomplish the static allocation by giving symbolic names to the fixed positions. Static variable-size nodes appear in locations |mem_bot| through |lo_mem_stat_max|, and static single-word nodes appear in locations |hi_mem_stat_min| through |mem_top|, inclusive. It is harmless to let |lig_trick| and |garbage| share the same location of |mem|. @d zero_glue mem_bot /*specification for \.{0pt plus 0pt minus 0pt}*/ @d fil_glue zero_glue+glue_spec_size /*\.{0pt plus 1fil minus 0pt}*/ @d fill_glue fil_glue+glue_spec_size /*\.{0pt plus 1fill minus 0pt}*/ @d ss_glue fill_glue+glue_spec_size /*\.{0pt plus 1fil minus 1fil}*/ @d fil_neg_glue ss_glue+glue_spec_size /*\.{0pt plus -1fil minus 0pt}*/ @d lo_mem_stat_max fil_neg_glue+glue_spec_size-1 /*largest statically allocated word in the variable-size |mem|*/ @# @d page_ins_head mem_top /*list of insertion data for current page*/ @d contrib_head mem_top-1 /*vlist of items not yet on current page*/ @d page_head mem_top-2 /*vlist for current page*/ @d temp_head mem_top-3 /*head of a temporary list of some kind*/ @d hold_head mem_top-4 /*head of a temporary list of another kind*/ @d adjust_head mem_top-5 /*head of adjustment list returned by |hpack|*/ @d active mem_top-7 /*head of active list in |line_break|, needs two words*/ @d align_head mem_top-8 /*head of preamble list for alignments*/ @d end_span mem_top-9 /*tail of spanned-width lists*/ @d omit_template mem_top-10 /*a constant token list*/ @d null_list mem_top-11 /*permanently empty list*/ @d lig_trick mem_top-12 /*a ligature masquerading as a |char_node|*/ @d garbage mem_top-12 /*used for scrap information*/ @d backup_head mem_top-13 /*head of token list built by |scan_keyword|*/ @d hi_mem_stat_min mem_top-13 /*smallest statically allocated word in the one-word |mem|*/ @d hi_mem_stat_usage 14 /*the number of one-word nodes always present*/ @ The following code gets |mem| off to a good start, when \TeX\ is initializing itself the slow~way. @= int @!k; /*index into |mem|, |eqtb|, etc.*/ @ @= for (k=mem_bot+1; k<=lo_mem_stat_max; k++) mem[k].sc=0; /*all glue dimensions are zeroed*/ @^data structure assumptions@> k=mem_bot;@+while (k <= lo_mem_stat_max) /*set first words of glue specifications*/ {@+glue_ref_count(k)=null+1; stretch_order(k)=normal;shrink_order(k)=normal; k=k+glue_spec_size; } stretch(fil_glue)=unity;stretch_order(fil_glue)=fil;@/ stretch(fill_glue)=unity;stretch_order(fill_glue)=fill;@/ stretch(ss_glue)=unity;stretch_order(ss_glue)=fil;@/ shrink(ss_glue)=unity;shrink_order(ss_glue)=fil;@/ stretch(fil_neg_glue)=-unity;stretch_order(fil_neg_glue)=fil;@/ rover=lo_mem_stat_max+1; link(rover)=empty_flag; /*now initialize the dynamic memory*/ node_size(rover)=1000; /*which is a 1000-word available node*/ llink(rover)=rover;rlink(rover)=rover;@/ lo_mem_max=rover+1000;link(lo_mem_max)=null;info(lo_mem_max)=null;@/ for (k=hi_mem_stat_min; k<=mem_top; k++) mem[k]=mem[lo_mem_max]; /*clear list heads*/ @; avail=null;mem_end=mem_top; hi_mem_min=hi_mem_stat_min; /*initialize the one-word memory*/ var_used=lo_mem_stat_max+1-mem_bot;dyn_used=hi_mem_stat_usage; /*initialize statistics*/ @ If \TeX\ is extended improperly, the |mem| array might get screwed up. For example, some pointers might be wrong, or some ``dead'' nodes might not have been freed when the last reference to them disappeared. Procedures |check_mem| and |search_mem| are available to help diagnose such problems. These procedures make use of two arrays called |is_free| and |was_free| that are present only if \TeX's debugging routines have been included. (You may want to decrease the size of |mem| while you @^debugging@> are debugging.) @= #ifdef @!DEBUG bool @!is_free0[mem_max-mem_min+1], *const @!is_free = @!is_free0-mem_min; /*free cells*/ @t\hskip10pt@>bool @!was_free0[mem_max-mem_min+1], *const @!was_free = @!was_free0-mem_min; /*previously free cells*/ @t\hskip10pt@>pointer @!was_mem_end, @!was_lo_max, @!was_hi_min; /*previous |mem_end|, |lo_mem_max|, and |hi_mem_min|*/ @t\hskip10pt@>bool @!panicking; /*do we want to check memory constantly?*/ #endif @ @= #ifdef @!DEBUG was_mem_end=mem_min; /*indicate that everything was previously free*/ was_lo_max=mem_min;was_hi_min=mem_max; panicking=false; #endif @ Procedure |check_mem| makes sure that the available space lists of |mem| are well formed, and it optionally prints out all locations that are reserved now but were free the last time this procedure was called. @p #ifdef @!DEBUG void check_mem(bool @!print_locs) {@+ /*loop exits*/ pointer p, @!q; /*current locations of interest in |mem|*/ bool @!clobbered; /*is something amiss?*/ for (p=mem_min; p<=lo_mem_max; p++) is_free[p]=false; /*you can probably do this faster*/ for (p=hi_mem_min; p<=mem_end; p++) is_free[p]=false; /*ditto*/ @; @; @; if (print_locs) @; for (p=mem_min; p<=lo_mem_max; p++) was_free[p]=is_free[p]; for (p=hi_mem_min; p<=mem_end; p++) was_free[p]=is_free[p]; /*|was_free=is_free| might be faster*/ was_mem_end=mem_end;was_lo_max=lo_mem_max;was_hi_min=hi_mem_min; } #endif @ @= p=avail;q=null;clobbered=false; while (p!=null) {@+if ((p > mem_end)||(p < hi_mem_min)) clobbered=true; else if (is_free[p]) clobbered=true; if (clobbered) {@+print_nl("AVAIL list clobbered at "); @.AVAIL list clobbered...@> print_int(q);goto done1; } is_free[p]=true;q=p;p=link(q); } done1: @ @= p=rover;q=null;clobbered=false; @/do@+{if ((p >= lo_mem_max)||(p < mem_min)) clobbered=true; else if ((rlink(p) >= lo_mem_max)||(rlink(p) < mem_min)) clobbered=true; else if (!(is_empty(p))||(node_size(p) < 2)||@| (p+node_size(p) > lo_mem_max)||@|(llink(rlink(p))!=p)) clobbered=true; if (clobbered) {@+print_nl("Double-AVAIL list clobbered at "); print_int(q);goto done2; } for (q=p; q<=p+node_size(p)-1; q++) /*mark all locations free*/ {@+if (is_free[q]) {@+print_nl("Doubly free location at "); @.Doubly free location...@> print_int(q);goto done2; } is_free[q]=true; } q=p;p=rlink(p); }@+ while (!(p==rover)); done2: @ @= p=mem_min; while (p <= lo_mem_max) /*node |p| should not be empty*/ {@+if (is_empty(p)) {@+print_nl("Bad flag at ");print_int(p); @.Bad flag...@> } while ((p <= lo_mem_max)&&!is_free[p]) incr(p); while ((p <= lo_mem_max)&&is_free[p]) incr(p); } @ @= {@+print_nl("New busy locs:"); for (p=mem_min; p<=lo_mem_max; p++) if (!is_free[p]&&((p > was_lo_max)||was_free[p])) {@+print_char(' ');print_int(p); } for (p=hi_mem_min; p<=mem_end; p++) if (!is_free[p]&& ((p < was_hi_min)||(p > was_mem_end)||was_free[p])) {@+print_char(' ');print_int(p); } } @ The |search_mem| procedure attempts to answer the question ``Who points to node~|p|?'' In doing so, it fetches |link| and |info| fields of |mem| that might not be of type |two_halves|. Strictly speaking, this is @^dirty \PASCAL@> undefined in \PASCAL, and it can lead to ``false drops'' (words that seem to point to |p| purely by coincidence). But for debugging purposes, we want to rule out the places that do {\sl not\/} point to |p|, so a few false drops are tolerable. @p #ifdef @!DEBUG void search_mem(pointer @!p) /*look for pointers to |p|*/ {@+int q; /*current position being searched*/ for (q=mem_min; q<=lo_mem_max; q++) {@+if (link(q)==p) {@+print_nl("LINK(");print_int(q);print_char(')'); } if (info(q)==p) {@+print_nl("INFO(");print_int(q);print_char(')'); } } for (q=hi_mem_min; q<=mem_end; q++) {@+if (link(q)==p) {@+print_nl("LINK(");print_int(q);print_char(')'); } if (info(q)==p) {@+print_nl("INFO(");print_int(q);print_char(')'); } } @; @; @; } #endif @* Displaying boxes. We can reinforce our knowledge of the data structures just introduced by considering two procedures that display a list in symbolic form. The first of these, called |short_display|, is used in ``overfull box'' messages to give the top-level description of a list. The other one, called |show_node_list|, prints a detailed description of exactly what is in the data structure. The philosophy of |short_display| is to ignore the fine points about exactly what is inside boxes, except that ligatures and discretionary breaks are expanded. As a result, |short_display| is a recursive procedure, but the recursion is never more than one level deep. @^recursion@> A global variable |font_in_short_display| keeps track of the font code that is assumed to be present when |short_display| begins; deviations from this font will be printed. @= int @!font_in_short_display; /*an internal font number*/ @ Boxes, rules, inserts, whatsits, marks, and things in general that are sort of ``complicated'' are indicated only by printing `\.{[]}'. @p void short_display(int @!p) /*prints highlights of list |p|*/ {@+int n; /*for replacement counts*/ while (p > mem_min) {@+if (is_char_node(p)) {@+if (p <= mem_end) {@+if (font(p)!=font_in_short_display) {@+if ((font(p) < font_base)||(font(p) > font_max)) print_char('*'); @.*\relax@> else@; print_char(' ');font_in_short_display=font(p); } print_ASCII(qo(character(p))); } } else@; p=link(p); } } @ @= switch (type(p)) { case hlist_node: case vlist_node: case ins_node: case whatsit_node: case mark_node: case adjust_node: case unset_node: print_str("[]");@+break; case rule_node: print_char('|');@+break; case glue_node: if (glue_ptr(p)!=zero_glue) print_char(' ');@+break; case math_node: print_char('$');@+break; case ligature_node: short_display(lig_ptr(p));@+break; case disc_node: {@+short_display(pre_break(p)); short_display(post_break(p));@/ n=replace_count(p); while (n > 0) {@+if (link(p)!=null) p=link(p); decr(n); } } @+break; default:do_nothing; } @ The |show_node_list| routine requires some auxiliary subroutines: one to print a font-and-character combination, one to print a token list without its reference count, and one to print a rule dimension. @p void print_font_and_char(int @!p) /*prints |char_node| data*/ {@+if (p > mem_end) print_esc(@[@<|"CLOBBERED."|@>@]); else{@+if ((font(p) < font_base)||(font(p) > font_max)) print_char('*'); @.*\relax@> else@; print_char(' ');print_ASCII(qo(character(p))); } } @# void print_mark(int @!p) /*prints token list data in braces*/ {@+print_char('{'); if ((p < hi_mem_min)||(p > mem_end)) print_esc(@[@<|"CLOBBERED."|@>@]); else show_token_list(link(p), null, max_print_line-10); print_char('}'); } @# void print_rule_dimen(scaled @!d) /*prints dimension in rule node*/ {@+if (is_running(d)) print_char('*');else print_scaled(d); @.*\relax@> } @ Then there is a subroutine that prints glue stretch and shrink, possibly followed by the name of finite units: @p void print_glue(scaled @!d, int @!order, str_number @!s) /*prints a glue component*/ {@+print_scaled(d); if ((order < normal)||(order > filll)) print_str("foul"); else if (order > normal) {@+print_str("fil"); while (order > fil) {@+print_char('l');decr(order); } } else if (s!=0) print(s); } @ The next subroutine prints a whole glue specification. @p void print_spec(int @!p, str_number @!s) /*prints a glue specification*/ {@+if ((p < mem_min)||(p >= lo_mem_max)) print_char('*'); @.*\relax@> else{@+print_scaled(width(p)); if (s!=0) print(s); if (stretch(p)!=0) {@+print_str(" plus ");print_glue(stretch(p), stretch_order(p), s); } if (shrink(p)!=0) {@+print_str(" minus ");print_glue(shrink(p), shrink_order(p), s); } } } @ We also need to declare some procedures that appear later in this documentation. @p@@; @@; @ Since boxes can be inside of boxes, |show_node_list| is inherently recursive, @^recursion@> up to a given maximum number of levels. The history of nesting is indicated by the current string, which will be printed at the beginning of each line; the length of this string, namely |cur_length|, is the depth of nesting. Recursive calls on |show_node_list| therefore use the following pattern: @d node_list_display(X) {@+append_char('.');show_node_list(X);flush_char; } /*|str_room| need not be checked; see |show_box| below*/ @ A global variable called |depth_threshold| is used to record the maximum depth of nesting for which |show_node_list| will show information. If we have |depth_threshold==0|, for example, only the top level information will be given and no sublists will be traversed. Another global variable, called |breadth_max|, tells the maximum number of items to show at each level; |breadth_max| had better be positive, or you won't see anything. @= int @!depth_threshold; /*maximum nesting depth in box displays*/ int @!breadth_max; /*maximum number of items shown at the same list level*/ @ Now we are ready for |show_node_list| itself. This procedure has been written to be ``extra robust'' in the sense that it should not crash or get into a loop even if the data structures have been messed up by bugs in the rest of the program. You can safely call its parent routine |show_box(p)| for arbitrary values of |p| when you are debugging \TeX. However, in the presence of bad data, the procedure may @^dirty \PASCAL@>@^debugging@> fetch a |memory_word| whose variant is different from the way it was stored; for example, it might try to read |mem[p].hh| when |mem[p]| contains a scaled integer, if |p| is a pointer that has been clobbered or chosen at random. @p void show_node_list(int @!p) /*prints a node list symbolically*/ {@+ int n; /*the number of items already printed at this level*/ double @!g; /*a glue ratio, as a floating point number*/ if (cur_length > depth_threshold) {@+if (p > null) print_str(" []"); /*indicate that there's been some truncation*/ return; } n=0; while (p > mem_min) {@+print_ln();print_current_string(); /*display the nesting history*/ if (p > mem_end) /*pointer out of range*/ {@+print_str("Bad link, display aborted.");return; @.Bad link...@> } incr(n);if (n > breadth_max) /*time to stop*/ {@+print_str("etc.");return; @.etc@> } @; p=link(p); } } @ @= if (is_char_node(p)) print_font_and_char(p); else switch (type(p)) { case hlist_node: case vlist_node: case unset_node: @@;@+break; case rule_node: @@;@+break; case ins_node: @@;@+break; case whatsit_node: @@;@+break; case glue_node: @@;@+break; case kern_node: @@;@+break; case math_node: @@;@+break; case ligature_node: @@;@+break; case penalty_node: @@;@+break; case disc_node: @@;@+break; case mark_node: @@;@+break; case adjust_node: @@;@+break; @t\4@>@@; default:print_str("Unknown node type!"); } @ @= {@+if (type(p)==hlist_node) print_esc('h'); else if (type(p)==vlist_node) print_esc('v'); else print_esc(@[@<|"unset"|@>@]); print_str("box(");print_scaled(height(p));print_char('+'); print_scaled(depth(p));print_str(")x");print_scaled(width(p)); if (type(p)==unset_node) @@; else{@+@; if (shift_amount(p)!=0) {@+print_str(", shifted ");print_scaled(shift_amount(p)); } } node_list_display(list_ptr(p)); /*recursive call*/ } @ @= {@+if (span_count(p)!=min_quarterword) {@+print_str(" (");print_int(qo(span_count(p))+1); print_str(" columns)"); } if (glue_stretch(p)!=0) {@+print_str(", stretch ");print_glue(glue_stretch(p), glue_order(p), 0); } if (glue_shrink(p)!=0) {@+print_str(", shrink ");print_glue(glue_shrink(p), glue_sign(p), 0); } } @ The code will have to change in this place if |glue_ratio| is a structured type instead of an ordinary |double|. Note that this routine should avoid arithmetic errors even if the |glue_set| field holds an arbitrary random value. The following code assumes that a properly formed nonzero |double| number has absolute value $2^{20}$ or more when it is regarded as an integer; this precaution was adequate to prevent floating point underflow on the author's computer. @^system dependencies@> @^dirty \PASCAL@> @= g=float(glue_set(p)); if ((g!=float_constant(0))&&(glue_sign(p)!=normal)) {@+print_str(", glue set "); if (glue_sign(p)==shrinking) print_str("- "); if (abs(mem[p+glue_offset].i) < 04000000) print_str("?.?"); else if (abs(g) > float_constant(20000)) {@+if (g > float_constant(0)) print_char('>'); else print_str("< -"); print_glue(20000*unity, glue_order(p), 0); } else print_glue(round(unity*g), glue_order(p), 0); @^real multiplication@> } @ @= {@+print_esc(@[@<|"rule("|@>@]);print_rule_dimen(height(p));print_char('+'); print_rule_dimen(depth(p));print_str(")x");print_rule_dimen(width(p)); } @ @= {@+print_esc(@[@<|"insert"|@>@]);print_int(qo(subtype(p))); print_str(", natural size ");print_scaled(height(p)); print_str("; split(");print_spec(split_top_ptr(p), 0); print_char(',');print_scaled(depth(p)); print_str("); float cost ");print_int(float_cost(p)); node_list_display(ins_ptr(p)); /*recursive call*/ } @ @= if (subtype(p) >= a_leaders) @@; else{@+print_esc(@[@<|"glue"|@>@]); if (subtype(p)!=normal) {@+print_char('('); if (subtype(p) < cond_math_glue) print_skip_param(subtype(p)-1); else if (subtype(p)==cond_math_glue) print_esc(@[@<|"nonscript"|@>@]); else print_esc(@[@<|"mskip"|@>@]); print_char(')'); } if (subtype(p)!=cond_math_glue) {@+print_char(' '); if (subtype(p) < cond_math_glue) print_spec(glue_ptr(p), 0); else print_spec(glue_ptr(p),@[@<|"mu"|@>@]); } } @ @= {@+print_esc(empty_string); if (subtype(p)==c_leaders) print_char('c'); else if (subtype(p)==x_leaders) print_char('x'); print_str("leaders ");print_spec(glue_ptr(p), 0); node_list_display(leader_ptr(p)); /*recursive call*/ } @ An ``explicit'' kern value is indicated implicitly by an explicit space. @= if (subtype(p)!=mu_glue) {@+print_esc(@[@<|"kern"|@>@]); if (subtype(p)!=normal) print_char(' '); print_scaled(width(p)); if (subtype(p)==acc_kern) print_str(" (for accent)"); @.for accent@> } else{@+print_esc(@[@<|"mkern"|@>@]);print_scaled(width(p));print_str("mu"); } @ @= {@+print_esc(@[@<|"math"|@>@]); if (subtype(p)==before) print_str("on"); else print_str("off"); if (width(p)!=0) {@+print_str(", surrounded ");print_scaled(width(p)); } } @ @= {@+print_font_and_char(lig_char(p));print_str(" (ligature "); if (subtype(p) > 1) print_char('|'); font_in_short_display=font(lig_char(p));short_display(lig_ptr(p)); if (odd(subtype(p))) print_char('|'); print_char(')'); } @ @= {@+print_esc(@[@<|"penalty "|@>@]);print_int(penalty(p)); } @ The |post_break| list of a discretionary node is indicated by a prefixed `\.{\char'174}' instead of the `\..' before the |pre_break| list. @= {@+print_esc(@[@<|"discretionary"|@>@]); if (replace_count(p) > 0) {@+print_str(" replacing ");print_int(replace_count(p)); } node_list_display(pre_break(p)); /*recursive call*/ append_char('|');show_node_list(post_break(p));flush_char; /*recursive call*/ } @ @= {@+print_esc(@[@<|"mark"|@>@]);print_mark(mark_ptr(p)); } @ @= {@+print_esc(@[@<|"vadjust"|@>@]);node_list_display(adjust_ptr(p)); /*recursive call*/ } @ The recursive machinery is started by calling |show_box|. @^recursion@> @p void show_box(pointer @!p) {@+@; if (breadth_max <= 0) breadth_max=5; if (pool_ptr+depth_threshold >= pool_size) depth_threshold=pool_size-pool_ptr-1; /*now there's enough room for prefix string*/ show_node_list(p); /*the show starts at |p|*/ print_ln(); } @* Destroying boxes. When we are done with a node list, we are obliged to return it to free storage, including all of its sublists. The recursive procedure |flush_node_list| does this for us. @ First, however, we shall consider two non-recursive procedures that do simpler tasks. The first of these, |delete_token_ref|, is called when a pointer to a token list's reference count is being removed. This means that the token list should disappear if the reference count was |null|, otherwise the count should be decreased by one. @^reference counts@> @d token_ref_count(X) info(X) /*reference count preceding a token list*/ @p void delete_token_ref(pointer @!p) /*|p| points to the reference count of a token list that is losing one reference*/ {@+if (token_ref_count(p)==null) flush_list(p); else decr(token_ref_count(p)); } @ Similarly, |delete_glue_ref| is called when a pointer to a glue specification is being withdrawn. @^reference counts@> @d fast_delete_glue_ref(X) @t@>@;@/ {@+if (glue_ref_count(X)==null) free_node(X, glue_spec_size); else decr(glue_ref_count(X)); } @p void delete_glue_ref(pointer @!p) /*|p| points to a glue specification*/ fast_delete_glue_ref(p) @ Now we are ready to delete any node list, recursively. In practice, the nodes deleted are usually charnodes (about 2/3 of the time), and they are glue nodes in about half of the remaining cases. @^recursion@> @p void flush_node_list(pointer @!p) /*erase list of nodes starting at |p|*/ {@+ /*go here when node |p| has been freed*/ pointer q; /*successor to node |p|*/ while (p!=null) @^inner loop@> {@+q=link(p); if (is_char_node(p)) free_avail(p)@; else{@+switch (type(p)) { case hlist_node: case vlist_node: case unset_node: {@+flush_node_list(list_ptr(p)); free_node(p, box_node_size);goto done; } case rule_node: {@+free_node(p, rule_node_size);goto done; } case ins_node: {@+flush_node_list(ins_ptr(p)); delete_glue_ref(split_top_ptr(p)); free_node(p, ins_node_size);goto done; } case whatsit_node: @@; case glue_node: {@+fast_delete_glue_ref(glue_ptr(p)); if (leader_ptr(p)!=null) flush_node_list(leader_ptr(p)); } @+break; case kern_node: case math_node: case penalty_node: do_nothing;@+break; case ligature_node: flush_node_list(lig_ptr(p));@+break; case mark_node: delete_token_ref(mark_ptr(p));@+break; case disc_node: {@+flush_node_list(pre_break(p)); flush_node_list(post_break(p)); } @+break; case adjust_node: flush_node_list(adjust_ptr(p));@+break; @t\4@>@@; default:confusion(@[@<|"flushing"|@>@]); @:this can't happen flushing}{\quad flushing@> } @/ free_node(p, small_node_size); done: ;} p=q; } } @* Copying boxes. Another recursive operation that acts on boxes is sometimes needed: The procedure |copy_node_list| returns a pointer to another node list that has the same structure and meaning as the original. Note that since glue specifications and token lists have reference counts, we need not make copies of them. Reference counts can never get too large to fit in a halfword, since each pointer to a node is in a different memory address, and the total number of memory addresses fits in a halfword. @^recursion@> @^reference counts@> (Well, there actually are also references from outside |mem|; if the |save_stack| is made arbitrarily large, it would theoretically be possible to break \TeX\ by overflowing a reference count. But who would want to do that?) @d add_token_ref(X) incr(token_ref_count(X)) /*new reference to a token list*/ @d add_glue_ref(X) incr(glue_ref_count(X)) /*new reference to a glue spec*/ @ The copying procedure copies words en masse without bothering to look at their individual fields. If the node format changes---for example, if the size is altered, or if some link field is moved to another relative position---then this code may need to be changed too. @^data structure assumptions@> @p pointer copy_node_list(pointer @!p) /*makes a duplicate of the node list that starts at |p| and returns a pointer to the new list*/ {@+pointer h; /*temporary head of copied list*/ pointer @!q; /*previous position in new list*/ pointer @!r; /*current node being fabricated for new list*/ uint8_t @!words; /*number of words remaining to be copied*/ h=get_avail();q=h; while (p!=null) {@+@; link(q)=r;q=r;p=link(p); } link(q)=null;q=link(h);free_avail(h); return q; } @ @= words=1; /*this setting occurs in more branches than any other*/ if (is_char_node(p)) r=get_avail(); else@; while (words > 0) {@+decr(words);mem[r+words]=mem[p+words]; } @ @= switch (type(p)) { case hlist_node: case vlist_node: case unset_node: {@+r=get_node(box_node_size); mem[r+6]=mem[p+6];mem[r+5]=mem[p+5]; /*copy the last two words*/ list_ptr(r)=copy_node_list(list_ptr(p)); /*this affects |mem[r+5]|*/ words=5; } @+break; case rule_node: {@+r=get_node(rule_node_size);words=rule_node_size; } @+break; case ins_node: {@+r=get_node(ins_node_size);mem[r+4]=mem[p+4]; add_glue_ref(split_top_ptr(p)); ins_ptr(r)=copy_node_list(ins_ptr(p)); /*this affects |mem[r+4]|*/ words=ins_node_size-1; } @+break; case whatsit_node: @@;@+break; case glue_node: {@+r=get_node(small_node_size);add_glue_ref(glue_ptr(p)); glue_ptr(r)=glue_ptr(p);leader_ptr(r)=copy_node_list(leader_ptr(p)); } @+break; case kern_node: case math_node: case penalty_node: {@+r=get_node(small_node_size); words=small_node_size; } @+break; case ligature_node: {@+r=get_node(small_node_size); mem[lig_char(r)]=mem[lig_char(p)]; /*copy |font| and |character|*/ lig_ptr(r)=copy_node_list(lig_ptr(p)); } @+break; case disc_node: {@+r=get_node(small_node_size); pre_break(r)=copy_node_list(pre_break(p)); post_break(r)=copy_node_list(post_break(p)); } @+break; case mark_node: {@+r=get_node(small_node_size);add_token_ref(mark_ptr(p)); words=small_node_size; } @+break; case adjust_node: {@+r=get_node(small_node_size); adjust_ptr(r)=copy_node_list(adjust_ptr(p)); } @+break; /*|words==1==small_node_size-1|*/ default:confusion(@[@<|"copying"|@>@]); @:this can't happen copying}{\quad copying@> } @* The command codes. Before we can go any further, we need to define symbolic names for the internal code numbers that represent the various commands obeyed by \TeX. These codes are somewhat arbitrary, but not completely so. For example, the command codes for character types are fixed by the language, since a user says, e.g., `\.{\\catcode \`\\\${} = 3}' to make \.{\char'44} a math delimiter, and the command code |math_shift| is equal to~3. Some other codes have been made adjacent so that |case| statements in the program need not consider cases that are widely spaced, or so that |case| statements can be replaced by |if (| statements. At any rate, here is the list, for future reference. First come the ``catcode'' commands, several of which share their numeric codes with ordinary commands when the catcode cannot emerge from \TeX's scanning routine. @d escape 0 /*escape delimiter (called \.\\ in {\sl The \TeX book\/})*/ @:TeXbook}{\sl The \TeX book@> @d relax 0 /*do nothing ( \.{\\relax} )*/ @d left_brace 1 /*beginning of a group ( \.\{ )*/ @d right_brace 2 /*ending of a group ( \.\} )*/ @d math_shift 3 /*mathematics shift character ( \.\$ )*/ @d tab_mark 4 /*alignment delimiter ( \.\&, \.{\\span} )*/ @d car_ret 5 /*end of line ( |carriage_return|, \.{\\cr}, \.{\\crcr} )*/ @d out_param 5 /*output a macro parameter*/ @d mac_param 6 /*macro parameter symbol ( \.\# )*/ @d sup_mark 7 /*superscript ( \.{\char'136} )*/ @d sub_mark 8 /*subscript ( \.{\char'137} )*/ @d ignore 9 /*characters to ignore ( \.{\^\^@@} )*/ @d endv 9 /*end of \ list in alignment template*/ @d spacer 10 /*characters equivalent to blank space ( \.{\ } )*/ @d letter 11 /*characters regarded as letters ( \.{A..Z}, \.{a..z} )*/ @d other_char 12 /*none of the special character types*/ @d active_char 13 /*characters that invoke macros ( \.{\char`\~} )*/ @d par_end 13 /*end of paragraph ( \.{\\par} )*/ @d match 13 /*match a macro parameter*/ @d comment 14 /*characters that introduce comments ( \.\% )*/ @d end_match 14 /*end of parameters to macro*/ @d stop 14 /*end of job ( \.{\\end}, \.{\\dump} )*/ @d invalid_char 15 /*characters that shouldn't appear ( \.{\^\^?} )*/ @d delim_num 15 /*specify delimiter numerically ( \.{\\delimiter} )*/ @d max_char_code 15 /*largest catcode for individual characters*/ @ Next are the ordinary run-of-the-mill command codes. Codes that are |min_internal| or more represent internal quantities that might be expanded by `\.{\\the}'. @d char_num 16 /*character specified numerically ( \.{\\char} )*/ @d math_char_num 17 /*explicit math code ( \.{\\mathchar} )*/ @d mark 18 /*mark definition ( \.{\\mark} )*/ @d xray 19 /*peek inside of \TeX\ ( \.{\\show}, \.{\\showbox}, etc.~)*/ @d make_box 20 /*make a box ( \.{\\box}, \.{\\copy}, \.{\\hbox}, etc.~)*/ @d hmove 21 /*horizontal motion ( \.{\\moveleft}, \.{\\moveright} )*/ @d vmove 22 /*vertical motion ( \.{\\raise}, \.{\\lower} )*/ @d un_hbox 23 /*unglue a box ( \.{\\unhbox}, \.{\\unhcopy} )*/ @d un_vbox 24 /*unglue a box ( \.{\\unvbox}, \.{\\unvcopy} )*/ @d remove_item 25 /*nullify last item ( \.{\\unpenalty}, \.{\\unkern}, \.{\\unskip} )*/ @d hskip 26 /*horizontal glue ( \.{\\hskip}, \.{\\hfil}, etc.~)*/ @d vskip 27 /*vertical glue ( \.{\\vskip}, \.{\\vfil}, etc.~)*/ @d mskip 28 /*math glue ( \.{\\mskip} )*/ @d kern 29 /*fixed space ( \.{\\kern})*/ @d mkern 30 /*math kern ( \.{\\mkern} )*/ @d leader_ship 31 /*use a box ( \.{\\shipout}, \.{\\leaders}, etc.~)*/ @d halign 32 /*horizontal table alignment ( \.{\\halign} )*/ @d valign 33 /*vertical table alignment ( \.{\\valign} )*/ @d no_align 34 /*temporary escape from alignment ( \.{\\noalign} )*/ @d vrule 35 /*vertical rule ( \.{\\vrule} )*/ @d hrule 36 /*horizontal rule ( \.{\\hrule} )*/ @d insert 37 /*vlist inserted in box ( \.{\\insert} )*/ @d vadjust 38 /*vlist inserted in enclosing paragraph ( \.{\\vadjust} )*/ @d ignore_spaces 39 /*gobble |spacer| tokens ( \.{\\ignorespaces} )*/ @d after_assignment 40 /*save till assignment is done ( \.{\\afterassignment} )*/ @d after_group 41 /*save till group is done ( \.{\\aftergroup} )*/ @d break_penalty 42 /*additional badness ( \.{\\penalty} )*/ @d start_par 43 /*begin paragraph ( \.{\\indent}, \.{\\noindent} )*/ @d ital_corr 44 /*italic correction ( \.{\\/} )*/ @d accent 45 /*attach accent in text ( \.{\\accent} )*/ @d math_accent 46 /*attach accent in math ( \.{\\mathaccent} )*/ @d discretionary 47 /*discretionary texts ( \.{\\-}, \.{\\discretionary} )*/ @d eq_no 48 /*equation number ( \.{\\eqno}, \.{\\leqno} )*/ @d left_right 49 /*variable delimiter ( \.{\\left}, \.{\\right} )*/ @d math_comp 50 /*component of formula ( \.{\\mathbin}, etc.~)*/ @d limit_switch 51 /*diddle limit conventions ( \.{\\displaylimits}, etc.~)*/ @d above 52 /*generalized fraction ( \.{\\above}, \.{\\atop}, etc.~)*/ @d math_style 53 /*style specification ( \.{\\displaystyle}, etc.~)*/ @d math_choice 54 /*choice specification ( \.{\\mathchoice} )*/ @d non_script 55 /*conditional math glue ( \.{\\nonscript} )*/ @d vcenter 56 /*vertically center a vbox ( \.{\\vcenter} )*/ @d case_shift 57 /*force specific case ( \.{\\lowercase}, \.{\\uppercase}~)*/ @d message 58 /*send to user ( \.{\\message}, \.{\\errmessage} )*/ @d extension 59 /*extensions to \TeX\ ( \.{\\write}, \.{\\special}, etc.~)*/ @d in_stream 60 /*files for reading ( \.{\\openin}, \.{\\closein} )*/ @d begin_group 61 /*begin local grouping ( \.{\\begingroup} )*/ @d end_group 62 /*end local grouping ( \.{\\endgroup} )*/ @d omit 63 /*omit alignment template ( \.{\\omit} )*/ @d ex_space 64 /*explicit space ( \.{\\\ } )*/ @d no_boundary 65 /*suppress boundary ligatures ( \.{\\noboundary} )*/ @d radical 66 /*square root and similar signs ( \.{\\radical} )*/ @d end_cs_name 67 /*end control sequence ( \.{\\endcsname} )*/ @d min_internal 68 /*the smallest code that can follow \.{\\the}*/ @d char_given 68 /*character code defined by \.{\\chardef}*/ @d math_given 69 /*math code defined by \.{\\mathchardef}*/ @d last_item 70 /*most recent item ( \.{\\lastpenalty}, \.{\\lastkern}, \.{\\lastskip} )*/ @d max_non_prefixed_command 70 /*largest command code that can't be \.{\\global}*/ @ The next codes are special; they all relate to mode-independent assignment of values to \TeX's internal registers or tables. Codes that are |max_internal| or less represent internal quantities that might be expanded by `\.{\\the}'. @d toks_register 71 /*token list register ( \.{\\toks} )*/ @d assign_toks 72 /*special token list ( \.{\\output}, \.{\\everypar}, etc.~)*/ @d assign_int 73 /*user-defined integer ( \.{\\tolerance}, \.{\\day}, etc.~)*/ @d assign_dimen 74 /*user-defined length ( \.{\\hsize}, etc.~)*/ @d assign_glue 75 /*user-defined glue ( \.{\\baselineskip}, etc.~)*/ @d assign_mu_glue 76 /*user-defined muglue ( \.{\\thinmuskip}, etc.~)*/ @d assign_font_dimen 77 /*user-defined font dimension ( \.{\\fontdimen} )*/ @d assign_font_int 78 /*user-defined font integer ( \.{\\hyphenchar}, \.{\\skewchar} )*/ @d set_aux 79 /*specify state info ( \.{\\spacefactor}, \.{\\prevdepth} )*/ @d set_prev_graf 80 /*specify state info ( \.{\\prevgraf} )*/ @d set_page_dimen 81 /*specify state info ( \.{\\pagegoal}, etc.~)*/ @d set_page_int 82 /*specify state info ( \.{\\deadcycles}, \.{\\insertpenalties} )*/ @d set_box_dimen 83 /*change dimension of box ( \.{\\wd}, \.{\\ht}, \.{\\dp} )*/ @d set_shape 84 /*specify fancy paragraph shape ( \.{\\parshape} )*/ @d def_code 85 /*define a character code ( \.{\\catcode}, etc.~)*/ @d def_family 86 /*declare math fonts ( \.{\\textfont}, etc.~)*/ @d set_font 87 /*set current font ( font identifiers )*/ @d def_font 88 /*define a font file ( \.{\\font} )*/ @d register 89 /*internal register ( \.{\\count}, \.{\\dimen}, etc.~)*/ @d max_internal 89 /*the largest code that can follow \.{\\the}*/ @d advance 90 /*advance a register or parameter ( \.{\\advance} )*/ @d multiply 91 /*multiply a register or parameter ( \.{\\multiply} )*/ @d divide 92 /*divide a register or parameter ( \.{\\divide} )*/ @d prefix 93 /*qualify a definition ( \.{\\global}, \.{\\long}, \.{\\outer} )*/ @d let 94 /*assign a command code ( \.{\\let}, \.{\\futurelet} )*/ @d shorthand_def 95 /*code definition ( \.{\\chardef}, \.{\\countdef}, etc.~)*/ @d read_to_cs 96 /*read into a control sequence ( \.{\\read} )*/ @d def 97 /*macro definition ( \.{\\def}, \.{\\gdef}, \.{\\xdef}, \.{\\edef} )*/ @d set_box 98 /*set a box ( \.{\\setbox} )*/ @d hyph_data 99 /*hyphenation data ( \.{\\hyphenation}, \.{\\patterns} )*/ @d set_interaction 100 /*define level of interaction ( \.{\\batchmode}, etc.~)*/ @d max_command 100 /*the largest command code seen at |big_switch|*/ @ The remaining command codes are extra special, since they cannot get through \TeX's scanner to the main control routine. They have been given values higher than |max_command| so that their special nature is easily discernible. The ``expandable'' commands come first. @d undefined_cs (max_command+1) /*initial state of most |eq_type| fields*/ @d expand_after (max_command+2) /*special expansion ( \.{\\expandafter} )*/ @d no_expand (max_command+3) /*special nonexpansion ( \.{\\noexpand} )*/ @d input (max_command+4) /*input a source file ( \.{\\input}, \.{\\endinput} )*/ @d if_test (max_command+5) /*conditional text ( \.{\\if}, \.{\\ifcase}, etc.~)*/ @d fi_or_else (max_command+6) /*delimiters for conditionals ( \.{\\else}, etc.~)*/ @d cs_name (max_command+7) /*make a control sequence from tokens ( \.{\\csname} )*/ @d convert (max_command+8) /*convert to text ( \.{\\number}, \.{\\string}, etc.~)*/ @d the (max_command+9) /*expand an internal quantity ( \.{\\the} )*/ @d top_bot_mark (max_command+10) /*inserted mark ( \.{\\topmark}, etc.~)*/ @d call (max_command+11) /*non-long, non-outer control sequence*/ @d long_call (max_command+12) /*long, non-outer control sequence*/ @d outer_call (max_command+13) /*non-long, outer control sequence*/ @d long_outer_call (max_command+14) /*long, outer control sequence*/ @d end_template (max_command+15) /*end of an alignment template*/ @d dont_expand (max_command+16) /*the following token was marked by \.{\\noexpand}*/ @d glue_ref (max_command+17) /*the equivalent points to a glue specification*/ @d shape_ref (max_command+18) /*the equivalent points to a parshape specification*/ @d box_ref (max_command+19) /*the equivalent points to a box node, or is |null|*/ @d data (max_command+20) /*the equivalent is simply a halfword number*/ @* The semantic nest. \TeX\ is typically in the midst of building many lists at once. For example, when a math formula is being processed, \TeX\ is in math mode and working on an mlist; this formula has temporarily interrupted \TeX\ from being in horizontal mode and building the hlist of a paragraph; and this paragraph has temporarily interrupted \TeX\ from being in vertical mode and building the vlist for the next page of a document. Similarly, when a \.{\\vbox} occurs inside of an \.{\\hbox}, \TeX\ is temporarily interrupted from working in restricted horizontal mode, and it enters internal vertical mode. The ``semantic nest'' is a stack that keeps track of what lists and modes are currently suspended. At each level of processing we are in one of six modes: \yskip\hang|vmode| stands for vertical mode (the page builder); \hang|hmode| stands for horizontal mode (the paragraph builder); \hang|mmode| stands for displayed formula mode; \hang|-vmode| stands for internal vertical mode (e.g., in a \.{\\vbox}); \hang|-hmode| stands for restricted horizontal mode (e.g., in an \.{\\hbox}); \hang|-mmode| stands for math formula mode (not displayed). \yskip\noindent The mode is temporarily set to zero while processing \.{\\write} texts in the |ship_out| routine. Numeric values are assigned to |vmode|, |hmode|, and |mmode| so that \TeX's ``big semantic switch'' can select the appropriate thing to do by computing the value |abs(mode)+cur_cmd|, where |mode| is the current mode and |cur_cmd| is the current command code. @d vmode 1 /*vertical mode*/ @d hmode (vmode+max_command+1) /*horizontal mode*/ @d mmode (hmode+max_command+1) /*math mode*/ @p void print_mode(int @!m) /*prints the mode represented by |m|*/ {@+if (m > 0) switch (m/(max_command+1)) { case 0: print_str("vertical");@+break; case 1: print_str("horizontal");@+break; case 2: print_str("display math"); } else if (m==0) print_str("no"); else switch ((-m)/(max_command+1)) { case 0: print_str("internal vertical");@+break; case 1: print_str("restricted horizontal");@+break; case 2: print_str("math"); } print_str(" mode"); } @ The state of affairs at any semantic level can be represented by five values: \yskip\hang|mode| is the number representing the semantic mode, as just explained. \yskip\hang|head| is a |pointer| to a list head for the list being built; |link(head)| therefore points to the first element of the list, or to |null| if the list is empty. \yskip\hang|tail| is a |pointer| to the final node of the list being built; thus, |tail==head| if and only if the list is empty. \yskip\hang|prev_graf| is the number of lines of the current paragraph that have already been put into the present vertical list. \yskip\hang|aux| is an auxiliary |memory_word| that gives further information that is needed to characterize the situation. \yskip\noindent In vertical mode, |aux| is also known as |prev_depth|; it is the scaled value representing the depth of the previous box, for use in baseline calculations, or it is | <= -1000|pt if the next box on the vertical list is to be exempt from baseline calculations. In horizontal mode, |aux| is also known as |space_factor| and |clang|; it holds the current space factor used in spacing calculations, and the current language used for hyphenation. (The value of |clang| is undefined in restricted horizontal mode.) In math mode, |aux| is also known as |incompleat_noad|; if not |null|, it points to a record that represents the numerator of a generalized fraction for which the denominator is currently being formed in the current list. There is also a sixth quantity, |mode_line|, which correlates the semantic nest with the user's input; |mode_line| contains the source line number at which the current level of nesting was entered. The negative of this line number is the |mode_line| at the level of the user's output routine. In horizontal mode, the |prev_graf| field is used for initial language data. The semantic nest is an array called |nest| that holds the |mode|, |head|, |tail|, |prev_graf|, |aux|, and |mode_line| values for all semantic levels below the currently active one. Information about the currently active level is kept in the global quantities |mode|, |head|, |tail|, |prev_graf|, |aux|, and |mode_line|, which live in a \PASCAL\ record that is ready to be pushed onto |nest| if necessary. @d ignore_depth -65536000 /*|prev_depth| value that is ignored*/ @= typedef struct { int16_t @!mode_field;@+ pointer @!head_field, @!tail_field; int @!pg_field, @!ml_field;@+ memory_word @!aux_field; } list_state_record; @ @d mode cur_list.mode_field /*current mode*/ @d head cur_list.head_field /*header node of current list*/ @d tail cur_list.tail_field /*final node on current list*/ @d prev_graf cur_list.pg_field /*number of paragraph lines accumulated*/ @d aux cur_list.aux_field /*auxiliary data about the current list*/ @d prev_depth aux.sc /*the name of |aux| in vertical mode*/ @d space_factor aux.hh.lh /*part of |aux| in horizontal mode*/ @d clang aux.hh.rh /*the other part of |aux| in horizontal mode*/ @d incompleat_noad aux.i /*the name of |aux| in math mode*/ @d mode_line cur_list.ml_field /*source file line number at beginning of list*/ @= list_state_record @!nest[nest_size+1]; uint8_t @!nest_ptr; /*first unused location of |nest|*/ uint8_t @!max_nest_stack; /*maximum of |nest_ptr| when pushing*/ list_state_record @!cur_list; /*the ``top'' semantic state*/ int16_t @!shown_mode; /*most recent mode shown by \.{\\tracingcommands}*/ @ Here is a common way to make the current list grow: @d tail_append(X) {@+link(tail)=X;tail=link(tail); } @ We will see later that the vertical list at the bottom semantic level is split into two parts; the ``current page'' runs from |page_head| to |page_tail|, and the ``contribution list'' runs from |contrib_head| to |tail| of semantic level zero. The idea is that contributions are first formed in vertical mode, then ``contributed'' to the current page (during which time the page-breaking decisions are made). For now, we don't need to know any more details about the page-building process. @= nest_ptr=0;max_nest_stack=0; mode=vmode;head=contrib_head;tail=contrib_head; prev_depth=ignore_depth;mode_line=0; prev_graf=0;shown_mode=0; @; @ When \TeX's work on one level is interrupted, the state is saved by calling |push_nest|. This routine changes |head| and |tail| so that a new (empty) list is begun; it does not change |mode| or |aux|. @p void push_nest(void) /*enter a new semantic level, save the old*/ {@+if (nest_ptr > max_nest_stack) {@+max_nest_stack=nest_ptr; if (nest_ptr==nest_size) overflow("semantic nest size", nest_size); @:TeX capacity exceeded semantic nest size}{\quad semantic nest size@> } nest[nest_ptr]=cur_list; /*stack the record*/ incr(nest_ptr);head=get_avail();tail=head;prev_graf=0;mode_line=line; } @ Conversely, when \TeX\ is finished on the current level, the former state is restored by calling |pop_nest|. This routine will never be called at the lowest semantic level, nor will it be called unless |head| is a node that should be returned to free memory. @p void pop_nest(void) /*leave a semantic level, re-enter the old*/ {@+free_avail(head);decr(nest_ptr);cur_list=nest[nest_ptr]; } @ Here is a procedure that displays what \TeX\ is working on, at all levels. @p void print_totals(void); void show_activities(void) {@+int p; /*index into |nest|*/ int16_t @!m; /*mode*/ memory_word @!a; /*auxiliary*/ pointer @!q, @!r; /*for showing the current page*/ int @!t; /*ditto*/ nest[nest_ptr]=cur_list; /*put the top level into the array*/ print_nl("");print_ln(); for (p=nest_ptr; p>=0; p--) {@+m=nest[p].mode_field;a=nest[p].aux_field; print_nl("### ");print_mode(m); print_str(" entered at line ");print_int(abs(nest[p].ml_field)); if (m==hmode) if (nest[p].pg_field!=040600000) {@+print_str(" (language");print_int(nest[p].pg_field%0200000); print_str(":hyphenmin");print_int(nest[p].pg_field/020000000); print_char(',');print_int((nest[p].pg_field/0200000)%0100); print_char(')'); } if (nest[p].ml_field < 0) print_str(" (\\output routine)"); if (p==0) {@+@; if (link(contrib_head)!=null) print_nl("### recent contributions:"); } show_box(link(nest[p].head_field)); @; } } @ @= switch (abs(m)/(max_command+1)) { case 0: {@+print_nl("prevdepth "); if (a.sc <= ignore_depth) print_str("ignored"); else print_scaled(a.sc); if (nest[p].pg_field!=0) {@+print_str(", prevgraf "); print_int(nest[p].pg_field);print_str(" line"); if (nest[p].pg_field!=1) print_char('s'); } } @+break; case 1: {@+print_nl("spacefactor ");print_int(a.hh.lh); if (m > 0) @+if (a.hh.rh > 0) {@+print_str(", current language ");print_int(a.hh.rh);@+ } } @+break; case 2: if (a.i!=null) {@+print_str("this will be denominator of:");show_box(a.i);@+ } } /*there are no other cases*/ @* The table of equivalents. Now that we have studied the data structures for \TeX's semantic routines, we ought to consider the data structures used by its syntactic routines. In other words, our next concern will be the tables that \TeX\ looks at when it is scanning what the user has written. The biggest and most important such table is called |eqtb|. It holds the current ``equivalents'' of things; i.e., it explains what things mean or what their current values are, for all quantities that are subject to the nesting structure provided by \TeX's grouping mechanism. There are six parts to |eqtb|: \yskip\hangg 1) |eqtb[active_base dotdot(hash_base-1)]| holds the current equivalents of single-character control sequences. \yskip\hangg 2) |eqtb[hash_base dotdot(glue_base-1)]| holds the current equivalents of multiletter control sequences. \yskip\hangg 3) |eqtb[glue_base dotdot(local_base-1)]| holds the current equivalents of glue parameters like the current baselineskip. \yskip\hangg 4) |eqtb[local_base dotdot(int_base-1)]| holds the current equivalents of local halfword quantities like the current box registers, the current ``catcodes,'' the current font, and a pointer to the current paragraph shape. \yskip\hangg 5) |eqtb[int_base dotdot(dimen_base-1)]| holds the current equivalents of fullword integer parameters like the current hyphenation penalty. \yskip\hangg 6) |eqtb[dimen_base dotdot eqtb_size]| holds the current equivalents of fullword dimension parameters like the current hsize or amount of hanging indentation. \yskip\noindent Note that, for example, the current amount of baselineskip glue is determined by the setting of a particular location in region~3 of |eqtb|, while the current meaning of the control sequence `\.{\\baselineskip}' (which might have been changed by \.{\\def} or \.{\\let}) appears in region~2. @ Each entry in |eqtb| is a |memory_word|. Most of these words are of type |two_halves|, and subdivided into three fields: \yskip\hangg 1) The |eq_level| (a quarterword) is the level of grouping at which this equivalent was defined. If the level is |level_zero|, the equivalent has never been defined; |level_one| refers to the outer level (outside of all groups), and this level is also used for global definitions that never go away. Higher levels are for equivalents that will disappear at the end of their group. @^global definitions@> \yskip\hangg 2) The |eq_type| (another quarterword) specifies what kind of entry this is. There are many types, since each \TeX\ primitive like \.{\\hbox}, \.{\\def}, etc., has its own special code. The list of command codes above includes all possible settings of the |eq_type| field. \yskip\hangg 3) The |equiv| (a halfword) is the current equivalent value. This may be a font number, a pointer into |mem|, or a variety of other things. @d eq_level_field(X) X.hh.b1 @d eq_type_field(X) X.hh.b0 @d equiv_field(X) X.hh.rh @d eq_level(X) eq_level_field(eqtb[X]) /*level of definition*/ @d eq_type(X) eq_type_field(eqtb[X]) /*command code for equivalent*/ @d equiv(X) equiv_field(eqtb[X]) /*equivalent value*/ @d level_zero min_quarterword /*level for undefined quantities*/ @d level_one (level_zero+1) /*outermost level for defined quantities*/ @ Many locations in |eqtb| have symbolic names. The purpose of the next paragraphs is to define these names, and to set up the initial values of the equivalents. In the first region we have 256 equivalents for ``active characters'' that act as control sequences, followed by 256 equivalents for single-character control sequences. Then comes region~2, which corresponds to the hash table that we will define later. The maximum address in this region is used for a dummy control sequence that is perpetually undefined. There also are several locations for control sequences that are perpetually defined (since they are used in error recovery). @d active_base 1 /*beginning of region 1, for active character equivalents*/ @d single_base (active_base+256) /*equivalents of one-character control sequences*/ @d null_cs (single_base+256) /*equivalent of \.{\\csname\\endcsname}*/ @d hash_base (null_cs+1) /*beginning of region 2, for the hash table*/ @d frozen_control_sequence (hash_base+hash_size) /*for error recovery*/ @d frozen_protection frozen_control_sequence /*inaccessible but definable*/ @d frozen_cr (frozen_control_sequence+1) /*permanent `\.{\\cr}'*/ @d frozen_end_group (frozen_control_sequence+2) /*permanent `\.{\\endgroup}'*/ @d frozen_right (frozen_control_sequence+3) /*permanent `\.{\\right}'*/ @d frozen_fi (frozen_control_sequence+4) /*permanent `\.{\\fi}'*/ @d frozen_end_template (frozen_control_sequence+5) /*permanent `\.{\\endtemplate}'*/ @d frozen_endv (frozen_control_sequence+6) /*second permanent `\.{\\endtemplate}'*/ @d frozen_relax (frozen_control_sequence+7) /*permanent `\.{\\relax}'*/ @d end_write (frozen_control_sequence+8) /*permanent `\.{\\endwrite}'*/ @d frozen_dont_expand (frozen_control_sequence+9) /*permanent `\.{\\notexpanded:}'*/ @d frozen_null_font (frozen_control_sequence+10) /*permanent `\.{\\nullfont}'*/ @d font_id_base (frozen_null_font-font_base) /*begins table of 257 permanent font identifiers*/ @d undefined_control_sequence (frozen_null_font+257) /*dummy location*/ @d glue_base (undefined_control_sequence+1) /*beginning of region 3*/ @= eq_type(undefined_control_sequence)=undefined_cs; equiv(undefined_control_sequence)=null; eq_level(undefined_control_sequence)=level_zero; for (k=active_base; k<=undefined_control_sequence-1; k++) eqtb[k]=eqtb[undefined_control_sequence]; @ Here is a routine that displays the current meaning of an |eqtb| entry in region 1 or~2. (Similar routines for the other regions will appear below.) @= {@+sprint_cs(n);print_char('=');print_cmd_chr(eq_type(n), equiv(n)); if (eq_type(n) >= call) {@+print_char(':');show_token_list(link(equiv(n)), null, 32); } } @ Region 3 of |eqtb| contains the 256 \.{\\skip} registers, as well as the glue parameters defined here. It is important that the ``muskip'' parameters have larger numbers than the others. @d line_skip_code 0 /*interline glue if |baseline_skip| is infeasible*/ @d baseline_skip_code 1 /*desired glue between baselines*/ @d par_skip_code 2 /*extra glue just above a paragraph*/ @d above_display_skip_code 3 /*extra glue just above displayed math*/ @d below_display_skip_code 4 /*extra glue just below displayed math*/ @d above_display_short_skip_code 5 /*glue above displayed math following short lines*/ @d below_display_short_skip_code 6 /*glue below displayed math following short lines*/ @d left_skip_code 7 /*glue at left of justified lines*/ @d right_skip_code 8 /*glue at right of justified lines*/ @d top_skip_code 9 /*glue at top of main pages*/ @d split_top_skip_code 10 /*glue at top of split pages*/ @d tab_skip_code 11 /*glue between aligned entries*/ @d space_skip_code 12 /*glue between words (if not |zero_glue|)*/ @d xspace_skip_code 13 /*glue after sentences (if not |zero_glue|)*/ @d par_fill_skip_code 14 /*glue on last line of paragraph*/ @d thin_mu_skip_code 15 /*thin space in math formula*/ @d med_mu_skip_code 16 /*medium space in math formula*/ @d thick_mu_skip_code 17 /*thick space in math formula*/ @d glue_pars 18 /*total number of glue parameters*/ @d skip_base (glue_base+glue_pars) /*table of 256 ``skip'' registers*/ @d mu_skip_base (skip_base+256) /*table of 256 ``muskip'' registers*/ @d local_base (mu_skip_base+256) /*beginning of region 4*/ @# @d skip(X) equiv(skip_base+X) /*|mem| location of glue specification*/ @d mu_skip(X) equiv(mu_skip_base+X) /*|mem| location of math glue spec*/ @d glue_par(X) equiv(glue_base+X) /*|mem| location of glue specification*/ @d line_skip glue_par(line_skip_code) @d baseline_skip glue_par(baseline_skip_code) @d par_skip glue_par(par_skip_code) @d above_display_skip glue_par(above_display_skip_code) @d below_display_skip glue_par(below_display_skip_code) @d above_display_short_skip glue_par(above_display_short_skip_code) @d below_display_short_skip glue_par(below_display_short_skip_code) @d left_skip glue_par(left_skip_code) @d right_skip glue_par(right_skip_code) @d top_skip glue_par(top_skip_code) @d split_top_skip glue_par(split_top_skip_code) @d tab_skip glue_par(tab_skip_code) @d space_skip glue_par(space_skip_code) @d xspace_skip glue_par(xspace_skip_code) @d par_fill_skip glue_par(par_fill_skip_code) @d thin_mu_skip glue_par(thin_mu_skip_code) @d med_mu_skip glue_par(med_mu_skip_code) @d thick_mu_skip glue_par(thick_mu_skip_code) @=glue_par(n) @ Sometimes we need to convert \TeX's internal code numbers into symbolic form. The |print_skip_param| routine gives the symbolic name of a glue parameter. @= void print_skip_param(int @!n) {@+switch (n) { case line_skip_code: print_esc(@[@<|"lineskip"|@>@]);@+break; case baseline_skip_code: print_esc(@[@<|"baselineskip"|@>@]);@+break; case par_skip_code: print_esc(@[@<|"parskip"|@>@]);@+break; case above_display_skip_code: print_esc(@[@<|"abovedisplayskip"|@>@]);@+break; case below_display_skip_code: print_esc(@[@<|"belowdisplayskip"|@>@]);@+break; case above_display_short_skip_code: print_esc(@[@<|"abovedisplayshortskip"|@>@]);@+break; case below_display_short_skip_code: print_esc(@[@<|"belowdisplayshortskip"|@>@]);@+break; case left_skip_code: print_esc(@[@<|"leftskip"|@>@]);@+break; case right_skip_code: print_esc(@[@<|"rightskip"|@>@]);@+break; case top_skip_code: print_esc(@[@<|"topskip"|@>@]);@+break; case split_top_skip_code: print_esc(@[@<|"splittopskip"|@>@]);@+break; case tab_skip_code: print_esc(@[@<|"tabskip"|@>@]);@+break; case space_skip_code: print_esc(@[@<|"spaceskip"|@>@]);@+break; case xspace_skip_code: print_esc(@[@<|"xspaceskip"|@>@]);@+break; case par_fill_skip_code: print_esc(@[@<|"parfillskip"|@>@]);@+break; case thin_mu_skip_code: print_esc(@[@<|"thinmuskip"|@>@]);@+break; case med_mu_skip_code: print_esc(@[@<|"medmuskip"|@>@]);@+break; case thick_mu_skip_code: print_esc(@[@<|"thickmuskip"|@>@]);@+break; default:print_str("[unknown glue parameter!]"); } } @ The symbolic names for glue parameters are put into \TeX's hash table by using the routine called |primitive|, defined below. Let us enter them now, so that we don't have to list all those parameter names anywhere else. @= primitive(@[@<|"lineskip"|@>@], assign_glue, glue_base+line_skip_code);@/ @!@:line_skip_}{\.{\\lineskip} primitive@> primitive(@[@<|"baselineskip"|@>@], assign_glue, glue_base+baseline_skip_code);@/ @!@:baseline_skip_}{\.{\\baselineskip} primitive@> primitive(@[@<|"parskip"|@>@], assign_glue, glue_base+par_skip_code);@/ @!@:par_skip_}{\.{\\parskip} primitive@> primitive(@[@<|"abovedisplayskip"|@>@], assign_glue, glue_base+above_display_skip_code);@/ @!@:above_display_skip_}{\.{\\abovedisplayskip} primitive@> primitive(@[@<|"belowdisplayskip"|@>@], assign_glue, glue_base+below_display_skip_code);@/ @!@:below_display_skip_}{\.{\\belowdisplayskip} primitive@> primitive(@[@<|"abovedisplayshortskip"|@>@], assign_glue, glue_base+above_display_short_skip_code);@/ @!@:above_display_short_skip_}{\.{\\abovedisplayshortskip} primitive@> primitive(@[@<|"belowdisplayshortskip"|@>@], assign_glue, glue_base+below_display_short_skip_code);@/ @!@:below_display_short_skip_}{\.{\\belowdisplayshortskip} primitive@> primitive(@[@<|"leftskip"|@>@], assign_glue, glue_base+left_skip_code);@/ @!@:left_skip_}{\.{\\leftskip} primitive@> primitive(@[@<|"rightskip"|@>@], assign_glue, glue_base+right_skip_code);@/ @!@:right_skip_}{\.{\\rightskip} primitive@> primitive(@[@<|"topskip"|@>@], assign_glue, glue_base+top_skip_code);@/ @!@:top_skip_}{\.{\\topskip} primitive@> primitive(@[@<|"splittopskip"|@>@], assign_glue, glue_base+split_top_skip_code);@/ @!@:split_top_skip_}{\.{\\splittopskip} primitive@> primitive(@[@<|"tabskip"|@>@], assign_glue, glue_base+tab_skip_code);@/ @!@:tab_skip_}{\.{\\tabskip} primitive@> primitive(@[@<|"spaceskip"|@>@], assign_glue, glue_base+space_skip_code);@/ @!@:space_skip_}{\.{\\spaceskip} primitive@> primitive(@[@<|"xspaceskip"|@>@], assign_glue, glue_base+xspace_skip_code);@/ @!@:xspace_skip_}{\.{\\xspaceskip} primitive@> primitive(@[@<|"parfillskip"|@>@], assign_glue, glue_base+par_fill_skip_code);@/ @!@:par_fill_skip_}{\.{\\parfillskip} primitive@> primitive(@[@<|"thinmuskip"|@>@], assign_mu_glue, glue_base+thin_mu_skip_code);@/ @!@:thin_mu_skip_}{\.{\\thinmuskip} primitive@> primitive(@[@<|"medmuskip"|@>@], assign_mu_glue, glue_base+med_mu_skip_code);@/ @!@:med_mu_skip_}{\.{\\medmuskip} primitive@> primitive(@[@<|"thickmuskip"|@>@], assign_mu_glue, glue_base+thick_mu_skip_code);@/ @!@:thick_mu_skip_}{\.{\\thickmuskip} primitive@> @ @= case assign_glue: case assign_mu_glue: if (chr_code < skip_base) print_skip_param(chr_code-glue_base); else if (chr_code < mu_skip_base) {@+print_esc(@[@<|"skip"|@>@]);print_int(chr_code-skip_base); } else{@+print_esc(@[@<|"muskip"|@>@]);print_int(chr_code-mu_skip_base); } @+break; @ All glue parameters and registers are initially `\.{0pt plus0pt minus0pt}'. @= equiv(glue_base)=zero_glue;eq_level(glue_base)=level_one; eq_type(glue_base)=glue_ref; for (k=glue_base+1; k<=local_base-1; k++) eqtb[k]=eqtb[glue_base]; glue_ref_count(zero_glue)=glue_ref_count(zero_glue)+local_base-glue_base; @ @= if (n < skip_base) {@+print_skip_param(n-glue_base);print_char('='); if (n < glue_base+thin_mu_skip_code) print_spec(equiv(n),@[@<|"pt"|@>@]); else print_spec(equiv(n),@[@<|"mu"|@>@]); } else if (n < mu_skip_base) {@+print_esc(@[@<|"skip"|@>@]);print_int(n-skip_base);print_char('='); print_spec(equiv(n),@[@<|"pt"|@>@]); } else{@+print_esc(@[@<|"muskip"|@>@]);print_int(n-mu_skip_base);print_char('='); print_spec(equiv(n),@[@<|"mu"|@>@]); } @ Region 4 of |eqtb| contains the local quantities defined here. The bulk of this region is taken up by five tables that are indexed by eight-bit characters; these tables are important to both the syntactic and semantic portions of \TeX. There are also a bunch of special things like font and token parameters, as well as the tables of \.{\\toks} and \.{\\box} registers. @d par_shape_loc local_base /*specifies paragraph shape*/ @d output_routine_loc (local_base+1) /*points to token list for \.{\\output}*/ @d every_par_loc (local_base+2) /*points to token list for \.{\\everypar}*/ @d every_math_loc (local_base+3) /*points to token list for \.{\\everymath}*/ @d every_display_loc (local_base+4) /*points to token list for \.{\\everydisplay}*/ @d every_hbox_loc (local_base+5) /*points to token list for \.{\\everyhbox}*/ @d every_vbox_loc (local_base+6) /*points to token list for \.{\\everyvbox}*/ @d every_job_loc (local_base+7) /*points to token list for \.{\\everyjob}*/ @d every_cr_loc (local_base+8) /*points to token list for \.{\\everycr}*/ @d err_help_loc (local_base+9) /*points to token list for \.{\\errhelp}*/ @d toks_base (local_base+10) /*table of 256 token list registers*/ @d box_base (toks_base+256) /*table of 256 box registers*/ @d cur_font_loc (box_base+256) /*internal font number outside math mode*/ @d math_font_base (cur_font_loc+1) /*table of 48 math font numbers*/ @d cat_code_base (math_font_base+48) /*table of 256 command codes (the ``catcodes'')*/ @d lc_code_base (cat_code_base+256) /*table of 256 lowercase mappings*/ @d uc_code_base (lc_code_base+256) /*table of 256 uppercase mappings*/ @d sf_code_base (uc_code_base+256) /*table of 256 spacefactor mappings*/ @d math_code_base (sf_code_base+256) /*table of 256 math mode mappings*/ @d int_base (math_code_base+256) /*beginning of region 5*/ @# @d par_shape_ptr equiv(par_shape_loc) @d output_routine equiv(output_routine_loc) @d every_par equiv(every_par_loc) @d every_math equiv(every_math_loc) @d every_display equiv(every_display_loc) @d every_hbox equiv(every_hbox_loc) @d every_vbox equiv(every_vbox_loc) @d every_job equiv(every_job_loc) @d every_cr equiv(every_cr_loc) @d err_help equiv(err_help_loc) @d toks(X) equiv(toks_base+X) @d box(X) equiv(box_base+X) @d cur_font equiv(cur_font_loc) @d fam_fnt(X) equiv(math_font_base+X) @d cat_code(X) equiv(cat_code_base+X) @d lc_code(X) equiv(lc_code_base+X) @d uc_code(X) equiv(uc_code_base+X) @d sf_code(X) equiv(sf_code_base+X) @d math_code(X) equiv(math_code_base+X) /*Note: |math_code(c)| is the true math code plus |min_halfword|*/ @= primitive(@[@<|"output"|@>@], assign_toks, output_routine_loc); @!@:output_}{\.{\\output} primitive@> primitive(@[@<|"everypar"|@>@], assign_toks, every_par_loc); @!@:every_par_}{\.{\\everypar} primitive@> primitive(@[@<|"everymath"|@>@], assign_toks, every_math_loc); @!@:every_math_}{\.{\\everymath} primitive@> primitive(@[@<|"everydisplay"|@>@], assign_toks, every_display_loc); @!@:every_display_}{\.{\\everydisplay} primitive@> primitive(@[@<|"everyhbox"|@>@], assign_toks, every_hbox_loc); @!@:every_hbox_}{\.{\\everyhbox} primitive@> primitive(@[@<|"everyvbox"|@>@], assign_toks, every_vbox_loc); @!@:every_vbox_}{\.{\\everyvbox} primitive@> primitive(@[@<|"everyjob"|@>@], assign_toks, every_job_loc); @!@:every_job_}{\.{\\everyjob} primitive@> primitive(@[@<|"everycr"|@>@], assign_toks, every_cr_loc); @!@:every_cr_}{\.{\\everycr} primitive@> primitive(@[@<|"errhelp"|@>@], assign_toks, err_help_loc); @!@:err_help_}{\.{\\errhelp} primitive@> @ @= case assign_toks: if (chr_code >= toks_base) {@+print_esc(@[@<|"toks"|@>@]);print_int(chr_code-toks_base); } else switch (chr_code) { case output_routine_loc: print_esc(@[@<|"output"|@>@]);@+break; case every_par_loc: print_esc(@[@<|"everypar"|@>@]);@+break; case every_math_loc: print_esc(@[@<|"everymath"|@>@]);@+break; case every_display_loc: print_esc(@[@<|"everydisplay"|@>@]);@+break; case every_hbox_loc: print_esc(@[@<|"everyhbox"|@>@]);@+break; case every_vbox_loc: print_esc(@[@<|"everyvbox"|@>@]);@+break; case every_job_loc: print_esc(@[@<|"everyjob"|@>@]);@+break; case every_cr_loc: print_esc(@[@<|"everycr"|@>@]);@+break; default:print_esc(@[@<|"errhelp"|@>@]); } @+break; @ We initialize most things to null or undefined values. An undefined font is represented by the internal code |font_base|. However, the character code tables are given initial values based on the conventional interpretation of ASCII code. These initial values should not be changed when \TeX\ is adapted for use with non-English languages; all changes to the initialization conventions should be made in format packages, not in \TeX\ itself, so that global interchange of formats is possible. @d null_font font_base @d var_code 070000 /*math code meaning ``use the current family''*/ @= par_shape_ptr=null;eq_type(par_shape_loc)=shape_ref; eq_level(par_shape_loc)=level_one;@/ for (k=output_routine_loc; k<=toks_base+255; k++) eqtb[k]=eqtb[undefined_control_sequence]; box(0)=null;eq_type(box_base)=box_ref;eq_level(box_base)=level_one; for (k=box_base+1; k<=box_base+255; k++) eqtb[k]=eqtb[box_base]; cur_font=null_font;eq_type(cur_font_loc)=data; eq_level(cur_font_loc)=level_one;@/ for (k=math_font_base; k<=math_font_base+47; k++) eqtb[k]=eqtb[cur_font_loc]; equiv(cat_code_base)=0;eq_type(cat_code_base)=data; eq_level(cat_code_base)=level_one;@/ for (k=cat_code_base+1; k<=int_base-1; k++) eqtb[k]=eqtb[cat_code_base]; for (k=0; k<=255; k++) {@+cat_code(k)=other_char;math_code(k)=hi(k);sf_code(k)=1000; } cat_code(carriage_return)=car_ret;cat_code(' ')=spacer; cat_code('\\')=escape;cat_code('%')=comment; cat_code(invalid_code)=invalid_char;cat_code(null_code)=ignore; for (k='0'; k<='9'; k++) math_code(k)=hi(k+var_code); for (k='A'; k<='Z'; k++) {@+cat_code(k)=letter;cat_code(k+'a'-'A')=letter;@/ math_code(k)=hi(k+var_code+0x100); math_code(k+'a'-'A')=hi(k+'a'-'A'+var_code+0x100);@/ lc_code(k)=k+'a'-'A';lc_code(k+'a'-'A')=k+'a'-'A';@/ uc_code(k)=k;uc_code(k+'a'-'A')=k;@/ sf_code(k)=999; } @ @= if (n==par_shape_loc) {@+print_esc(@[@<|"parshape"|@>@]);print_char('='); if (par_shape_ptr==null) print_char('0'); else print_int(info(par_shape_ptr)); } else if (n < toks_base) {@+print_cmd_chr(assign_toks, n);print_char('='); if (equiv(n)!=null) show_token_list(link(equiv(n)), null, 32); } else if (n < box_base) {@+print_esc(@[@<|"toks"|@>@]);print_int(n-toks_base);print_char('='); if (equiv(n)!=null) show_token_list(link(equiv(n)), null, 32); } else if (n < cur_font_loc) {@+print_esc(@[@<|"box"|@>@]);print_int(n-box_base);print_char('='); if (equiv(n)==null) print_str("void"); else{@+depth_threshold=0;breadth_max=1;show_node_list(equiv(n)); } } else if (n < cat_code_base) @@; else@@; @ @= {@+if (n==cur_font_loc) print_str("current font"); else if (n < math_font_base+16) {@+print_esc(@[@<|"textfont"|@>@]);print_int(n-math_font_base); } else if (n < math_font_base+32) {@+print_esc(@[@<|"scriptfont"|@>@]);print_int(n-math_font_base-16); } else{@+print_esc(@[@<|"scriptscriptfont"|@>@]);print_int(n-math_font_base-32); } print_char('=');@/ print_esc(hash[font_id_base+equiv(n)].rh); /*that's |font_id_text(equiv(n))|*/ } @ @= if (n < math_code_base) {@+if (n < lc_code_base) {@+print_esc(@[@<|"catcode"|@>@]);print_int(n-cat_code_base); } else if (n < uc_code_base) {@+print_esc(@[@<|"lccode"|@>@]);print_int(n-lc_code_base); } else if (n < sf_code_base) {@+print_esc(@[@<|"uccode"|@>@]);print_int(n-uc_code_base); } else{@+print_esc(@[@<|"sfcode"|@>@]);print_int(n-sf_code_base); } print_char('=');print_int(equiv(n)); } else{@+print_esc(@[@<|"mathcode"|@>@]);print_int(n-math_code_base); print_char('=');print_int(ho(equiv(n))); } @ Region 5 of |eqtb| contains the integer parameters and registers defined here, as well as the |del_code| table. The latter table differs from the |cat_code dotdot math_code| tables that precede it, since delimiter codes are fullword integers while the other kinds of codes occupy at most a halfword. This is what makes region~5 different from region~4. We will store the |eq_level| information in an auxiliary array of quarterwords that will be defined later. @d pretolerance_code 0 /*badness tolerance before hyphenation*/ @d tolerance_code 1 /*badness tolerance after hyphenation*/ @d line_penalty_code 2 /*added to the badness of every line*/ @d hyphen_penalty_code 3 /*penalty for break after discretionary hyphen*/ @d ex_hyphen_penalty_code 4 /*penalty for break after explicit hyphen*/ @d club_penalty_code 5 /*penalty for creating a club line*/ @d widow_penalty_code 6 /*penalty for creating a widow line*/ @d display_widow_penalty_code 7 /*ditto, just before a display*/ @d broken_penalty_code 8 /*penalty for breaking a page at a broken line*/ @d bin_op_penalty_code 9 /*penalty for breaking after a binary operation*/ @d rel_penalty_code 10 /*penalty for breaking after a relation*/ @d pre_display_penalty_code 11 /*penalty for breaking just before a displayed formula*/ @d post_display_penalty_code 12 /*penalty for breaking just after a displayed formula*/ @d inter_line_penalty_code 13 /*additional penalty between lines*/ @d double_hyphen_demerits_code 14 /*demerits for double hyphen break*/ @d final_hyphen_demerits_code 15 /*demerits for final hyphen break*/ @d adj_demerits_code 16 /*demerits for adjacent incompatible lines*/ @d mag_code 17 /*magnification ratio*/ @d delimiter_factor_code 18 /*ratio for variable-size delimiters*/ @d looseness_code 19 /*change in number of lines for a paragraph*/ @d time_code 20 /*current time of day*/ @d day_code 21 /*current day of the month*/ @d month_code 22 /*current month of the year*/ @d year_code 23 /*current year of our Lord*/ @d show_box_breadth_code 24 /*nodes per level in |show_box|*/ @d show_box_depth_code 25 /*maximum level in |show_box|*/ @d hbadness_code 26 /*hboxes exceeding this badness will be shown by |hpack|*/ @d vbadness_code 27 /*vboxes exceeding this badness will be shown by |vpack|*/ @d pausing_code 28 /*pause after each line is read from a file*/ @d tracing_online_code 29 /*show diagnostic output on terminal*/ @d tracing_macros_code 30 /*show macros as they are being expanded*/ @d tracing_stats_code 31 /*show memory usage if \TeX\ knows it*/ @d tracing_paragraphs_code 32 /*show line-break calculations*/ @d tracing_pages_code 33 /*show page-break calculations*/ @d tracing_output_code 34 /*show boxes when they are shipped out*/ @d tracing_lost_chars_code 35 /*show characters that aren't in the font*/ @d tracing_commands_code 36 /*show command codes at |big_switch|*/ @d tracing_restores_code 37 /*show equivalents when they are restored*/ @d uc_hyph_code 38 /*hyphenate words beginning with a capital letter*/ @d output_penalty_code 39 /*penalty found at current page break*/ @d max_dead_cycles_code 40 /*bound on consecutive dead cycles of output*/ @d hang_after_code 41 /*hanging indentation changes after this many lines*/ @d floating_penalty_code 42 /*penalty for insertions heldover after a split*/ @d global_defs_code 43 /*override \.{\\global} specifications*/ @d cur_fam_code 44 /*current family*/ @d escape_char_code 45 /*escape character for token output*/ @d default_hyphen_char_code 46 /*value of \.{\\hyphenchar} when a font is loaded*/ @d default_skew_char_code 47 /*value of \.{\\skewchar} when a font is loaded*/ @d end_line_char_code 48 /*character placed at the right end of the buffer*/ @d new_line_char_code 49 /*character that prints as |print_ln|*/ @d language_code 50 /*current hyphenation table*/ @d left_hyphen_min_code 51 /*minimum left hyphenation fragment size*/ @d right_hyphen_min_code 52 /*minimum right hyphenation fragment size*/ @d holding_inserts_code 53 /*do not remove insertion nodes from \.{\\box255}*/ @d error_context_lines_code 54 /*maximum intermediate line pairs shown*/ @d int_pars 55 /*total number of integer parameters*/ @d count_base (int_base+int_pars) /*256 user \.{\\count} registers*/ @d del_code_base (count_base+256) /*256 delimiter code mappings*/ @d dimen_base (del_code_base+256) /*beginning of region 6*/ @# @d del_code(X) eqtb[del_code_base+X].i @d count(X) eqtb[count_base+X].i @d int_par(X) eqtb[int_base+X].i /*an integer parameter*/ @d pretolerance int_par(pretolerance_code) @d tolerance int_par(tolerance_code) @d line_penalty int_par(line_penalty_code) @d hyphen_penalty int_par(hyphen_penalty_code) @d ex_hyphen_penalty int_par(ex_hyphen_penalty_code) @d club_penalty int_par(club_penalty_code) @d widow_penalty int_par(widow_penalty_code) @d display_widow_penalty int_par(display_widow_penalty_code) @d broken_penalty int_par(broken_penalty_code) @d bin_op_penalty int_par(bin_op_penalty_code) @d rel_penalty int_par(rel_penalty_code) @d pre_display_penalty int_par(pre_display_penalty_code) @d post_display_penalty int_par(post_display_penalty_code) @d inter_line_penalty int_par(inter_line_penalty_code) @d double_hyphen_demerits int_par(double_hyphen_demerits_code) @d final_hyphen_demerits int_par(final_hyphen_demerits_code) @d adj_demerits int_par(adj_demerits_code) @d mag int_par(mag_code) @d delimiter_factor int_par(delimiter_factor_code) @d looseness int_par(looseness_code) @d time int_par(time_code) @d day int_par(day_code) @d month int_par(month_code) @d year int_par(year_code) @d show_box_breadth int_par(show_box_breadth_code) @d show_box_depth int_par(show_box_depth_code) @d hbadness int_par(hbadness_code) @d vbadness int_par(vbadness_code) @d pausing int_par(pausing_code) @d tracing_online int_par(tracing_online_code) @d tracing_macros int_par(tracing_macros_code) @d tracing_stats int_par(tracing_stats_code) @d tracing_paragraphs int_par(tracing_paragraphs_code) @d tracing_pages int_par(tracing_pages_code) @d tracing_output int_par(tracing_output_code) @d tracing_lost_chars int_par(tracing_lost_chars_code) @d tracing_commands int_par(tracing_commands_code) @d tracing_restores int_par(tracing_restores_code) @d uc_hyph int_par(uc_hyph_code) @d output_penalty int_par(output_penalty_code) @d max_dead_cycles int_par(max_dead_cycles_code) @d hang_after int_par(hang_after_code) @d floating_penalty int_par(floating_penalty_code) @d global_defs int_par(global_defs_code) @d cur_fam int_par(cur_fam_code) @d escape_char int_par(escape_char_code) @d default_hyphen_char int_par(default_hyphen_char_code) @d default_skew_char int_par(default_skew_char_code) @d end_line_char int_par(end_line_char_code) @d new_line_char int_par(new_line_char_code) @d language int_par(language_code) @d left_hyphen_min int_par(left_hyphen_min_code) @d right_hyphen_min int_par(right_hyphen_min_code) @d holding_inserts int_par(holding_inserts_code) @d error_context_lines int_par(error_context_lines_code) @= depth_threshold=show_box_depth; breadth_max=show_box_breadth @ We can print the symbolic name of an integer parameter as follows. @p void print_param(int @!n) {@+switch (n) { case pretolerance_code: print_esc(@[@<|"pretolerance"|@>@]);@+break; case tolerance_code: print_esc(@[@<|"tolerance"|@>@]);@+break; case line_penalty_code: print_esc(@[@<|"linepenalty"|@>@]);@+break; case hyphen_penalty_code: print_esc(@[@<|"hyphenpenalty"|@>@]);@+break; case ex_hyphen_penalty_code: print_esc(@[@<|"exhyphenpenalty"|@>@]);@+break; case club_penalty_code: print_esc(@[@<|"clubpenalty"|@>@]);@+break; case widow_penalty_code: print_esc(@[@<|"widowpenalty"|@>@]);@+break; case display_widow_penalty_code: print_esc(@[@<|"displaywidowpenalty"|@>@]);@+break; case broken_penalty_code: print_esc(@[@<|"brokenpenalty"|@>@]);@+break; case bin_op_penalty_code: print_esc(@[@<|"binoppenalty"|@>@]);@+break; case rel_penalty_code: print_esc(@[@<|"relpenalty"|@>@]);@+break; case pre_display_penalty_code: print_esc(@[@<|"predisplaypenalty"|@>@]);@+break; case post_display_penalty_code: print_esc(@[@<|"postdisplaypenalty"|@>@]);@+break; case inter_line_penalty_code: print_esc(@[@<|"interlinepenalty"|@>@]);@+break; case double_hyphen_demerits_code: print_esc(@[@<|"doublehyphendemerits"|@>@]);@+break; case final_hyphen_demerits_code: print_esc(@[@<|"finalhyphendemerits"|@>@]);@+break; case adj_demerits_code: print_esc(@[@<|"adjdemerits"|@>@]);@+break; case mag_code: print_esc(@[@<|"mag"|@>@]);@+break; case delimiter_factor_code: print_esc(@[@<|"delimiterfactor"|@>@]);@+break; case looseness_code: print_esc(@[@<|"looseness"|@>@]);@+break; case time_code: print_esc(@[@<|"time"|@>@]);@+break; case day_code: print_esc(@[@<|"day"|@>@]);@+break; case month_code: print_esc(@[@<|"month"|@>@]);@+break; case year_code: print_esc(@[@<|"year"|@>@]);@+break; case show_box_breadth_code: print_esc(@[@<|"showboxbreadth"|@>@]);@+break; case show_box_depth_code: print_esc(@[@<|"showboxdepth"|@>@]);@+break; case hbadness_code: print_esc(@[@<|"hbadness"|@>@]);@+break; case vbadness_code: print_esc(@[@<|"vbadness"|@>@]);@+break; case pausing_code: print_esc(@[@<|"pausing"|@>@]);@+break; case tracing_online_code: print_esc(@[@<|"tracingonline"|@>@]);@+break; case tracing_macros_code: print_esc(@[@<|"tracingmacros"|@>@]);@+break; case tracing_stats_code: print_esc(@[@<|"tracingstats"|@>@]);@+break; case tracing_paragraphs_code: print_esc(@[@<|"tracingparagraphs"|@>@]);@+break; case tracing_pages_code: print_esc(@[@<|"tracingpages"|@>@]);@+break; case tracing_output_code: print_esc(@[@<|"tracingoutput"|@>@]);@+break; case tracing_lost_chars_code: print_esc(@[@<|"tracinglostchars"|@>@]);@+break; case tracing_commands_code: print_esc(@[@<|"tracingcommands"|@>@]);@+break; case tracing_restores_code: print_esc(@[@<|"tracingrestores"|@>@]);@+break; case uc_hyph_code: print_esc(@[@<|"uchyph"|@>@]);@+break; case output_penalty_code: print_esc(@[@<|"outputpenalty"|@>@]);@+break; case max_dead_cycles_code: print_esc(@[@<|"maxdeadcycles"|@>@]);@+break; case hang_after_code: print_esc(@[@<|"hangafter"|@>@]);@+break; case floating_penalty_code: print_esc(@[@<|"floatingpenalty"|@>@]);@+break; case global_defs_code: print_esc(@[@<|"globaldefs"|@>@]);@+break; case cur_fam_code: print_esc(@[@<|"fam"|@>@]);@+break; case escape_char_code: print_esc(@[@<|"escapechar"|@>@]);@+break; case default_hyphen_char_code: print_esc(@[@<|"defaulthyphenchar"|@>@]);@+break; case default_skew_char_code: print_esc(@[@<|"defaultskewchar"|@>@]);@+break; case end_line_char_code: print_esc(@[@<|"endlinechar"|@>@]);@+break; case new_line_char_code: print_esc(@[@<|"newlinechar"|@>@]);@+break; case language_code: print_esc(@[@<|"language"|@>@]);@+break; case left_hyphen_min_code: print_esc(@[@<|"lefthyphenmin"|@>@]);@+break; case right_hyphen_min_code: print_esc(@[@<|"righthyphenmin"|@>@]);@+break; case holding_inserts_code: print_esc(@[@<|"holdinginserts"|@>@]);@+break; case error_context_lines_code: print_esc(@[@<|"errorcontextlines"|@>@]);@+break; default:print_str("[unknown integer parameter!]"); } } @ The integer parameter names must be entered into the hash table. @= primitive(@[@<|"pretolerance"|@>@], assign_int, int_base+pretolerance_code);@/ @!@:pretolerance_}{\.{\\pretolerance} primitive@> primitive(@[@<|"tolerance"|@>@], assign_int, int_base+tolerance_code);@/ @!@:tolerance_}{\.{\\tolerance} primitive@> primitive(@[@<|"linepenalty"|@>@], assign_int, int_base+line_penalty_code);@/ @!@:line_penalty_}{\.{\\linepenalty} primitive@> primitive(@[@<|"hyphenpenalty"|@>@], assign_int, int_base+hyphen_penalty_code);@/ @!@:hyphen_penalty_}{\.{\\hyphenpenalty} primitive@> primitive(@[@<|"exhyphenpenalty"|@>@], assign_int, int_base+ex_hyphen_penalty_code);@/ @!@:ex_hyphen_penalty_}{\.{\\exhyphenpenalty} primitive@> primitive(@[@<|"clubpenalty"|@>@], assign_int, int_base+club_penalty_code);@/ @!@:club_penalty_}{\.{\\clubpenalty} primitive@> primitive(@[@<|"widowpenalty"|@>@], assign_int, int_base+widow_penalty_code);@/ @!@:widow_penalty_}{\.{\\widowpenalty} primitive@> primitive(@[@<|"displaywidowpenalty"|@>@], assign_int, int_base+display_widow_penalty_code);@/ @!@:display_widow_penalty_}{\.{\\displaywidowpenalty} primitive@> primitive(@[@<|"brokenpenalty"|@>@], assign_int, int_base+broken_penalty_code);@/ @!@:broken_penalty_}{\.{\\brokenpenalty} primitive@> primitive(@[@<|"binoppenalty"|@>@], assign_int, int_base+bin_op_penalty_code);@/ @!@:bin_op_penalty_}{\.{\\binoppenalty} primitive@> primitive(@[@<|"relpenalty"|@>@], assign_int, int_base+rel_penalty_code);@/ @!@:rel_penalty_}{\.{\\relpenalty} primitive@> primitive(@[@<|"predisplaypenalty"|@>@], assign_int, int_base+pre_display_penalty_code);@/ @!@:pre_display_penalty_}{\.{\\predisplaypenalty} primitive@> primitive(@[@<|"postdisplaypenalty"|@>@], assign_int, int_base+post_display_penalty_code);@/ @!@:post_display_penalty_}{\.{\\postdisplaypenalty} primitive@> primitive(@[@<|"interlinepenalty"|@>@], assign_int, int_base+inter_line_penalty_code);@/ @!@:inter_line_penalty_}{\.{\\interlinepenalty} primitive@> primitive(@[@<|"doublehyphendemerits"|@>@], assign_int, int_base+double_hyphen_demerits_code);@/ @!@:double_hyphen_demerits_}{\.{\\doublehyphendemerits} primitive@> primitive(@[@<|"finalhyphendemerits"|@>@], assign_int, int_base+final_hyphen_demerits_code);@/ @!@:final_hyphen_demerits_}{\.{\\finalhyphendemerits} primitive@> primitive(@[@<|"adjdemerits"|@>@], assign_int, int_base+adj_demerits_code);@/ @!@:adj_demerits_}{\.{\\adjdemerits} primitive@> primitive(@[@<|"mag"|@>@], assign_int, int_base+mag_code);@/ @!@:mag_}{\.{\\mag} primitive@> primitive(@[@<|"delimiterfactor"|@>@], assign_int, int_base+delimiter_factor_code);@/ @!@:delimiter_factor_}{\.{\\delimiterfactor} primitive@> primitive(@[@<|"looseness"|@>@], assign_int, int_base+looseness_code);@/ @!@:looseness_}{\.{\\looseness} primitive@> primitive(@[@<|"time"|@>@], assign_int, int_base+time_code);@/ @!@:time_}{\.{\\time} primitive@> primitive(@[@<|"day"|@>@], assign_int, int_base+day_code);@/ @!@:day_}{\.{\\day} primitive@> primitive(@[@<|"month"|@>@], assign_int, int_base+month_code);@/ @!@:month_}{\.{\\month} primitive@> primitive(@[@<|"year"|@>@], assign_int, int_base+year_code);@/ @!@:year_}{\.{\\year} primitive@> primitive(@[@<|"showboxbreadth"|@>@], assign_int, int_base+show_box_breadth_code);@/ @!@:show_box_breadth_}{\.{\\showboxbreadth} primitive@> primitive(@[@<|"showboxdepth"|@>@], assign_int, int_base+show_box_depth_code);@/ @!@:show_box_depth_}{\.{\\showboxdepth} primitive@> primitive(@[@<|"hbadness"|@>@], assign_int, int_base+hbadness_code);@/ @!@:hbadness_}{\.{\\hbadness} primitive@> primitive(@[@<|"vbadness"|@>@], assign_int, int_base+vbadness_code);@/ @!@:vbadness_}{\.{\\vbadness} primitive@> primitive(@[@<|"pausing"|@>@], assign_int, int_base+pausing_code);@/ @!@:pausing_}{\.{\\pausing} primitive@> primitive(@[@<|"tracingonline"|@>@], assign_int, int_base+tracing_online_code);@/ @!@:tracing_online_}{\.{\\tracingonline} primitive@> primitive(@[@<|"tracingmacros"|@>@], assign_int, int_base+tracing_macros_code);@/ @!@:tracing_macros_}{\.{\\tracingmacros} primitive@> primitive(@[@<|"tracingstats"|@>@], assign_int, int_base+tracing_stats_code);@/ @!@:tracing_stats_}{\.{\\tracingstats} primitive@> primitive(@[@<|"tracingparagraphs"|@>@], assign_int, int_base+tracing_paragraphs_code);@/ @!@:tracing_paragraphs_}{\.{\\tracingparagraphs} primitive@> primitive(@[@<|"tracingpages"|@>@], assign_int, int_base+tracing_pages_code);@/ @!@:tracing_pages_}{\.{\\tracingpages} primitive@> primitive(@[@<|"tracingoutput"|@>@], assign_int, int_base+tracing_output_code);@/ @!@:tracing_output_}{\.{\\tracingoutput} primitive@> primitive(@[@<|"tracinglostchars"|@>@], assign_int, int_base+tracing_lost_chars_code);@/ @!@:tracing_lost_chars_}{\.{\\tracinglostchars} primitive@> primitive(@[@<|"tracingcommands"|@>@], assign_int, int_base+tracing_commands_code);@/ @!@:tracing_commands_}{\.{\\tracingcommands} primitive@> primitive(@[@<|"tracingrestores"|@>@], assign_int, int_base+tracing_restores_code);@/ @!@:tracing_restores_}{\.{\\tracingrestores} primitive@> primitive(@[@<|"uchyph"|@>@], assign_int, int_base+uc_hyph_code);@/ @!@:uc_hyph_}{\.{\\uchyph} primitive@> primitive(@[@<|"outputpenalty"|@>@], assign_int, int_base+output_penalty_code);@/ @!@:output_penalty_}{\.{\\outputpenalty} primitive@> primitive(@[@<|"maxdeadcycles"|@>@], assign_int, int_base+max_dead_cycles_code);@/ @!@:max_dead_cycles_}{\.{\\maxdeadcycles} primitive@> primitive(@[@<|"hangafter"|@>@], assign_int, int_base+hang_after_code);@/ @!@:hang_after_}{\.{\\hangafter} primitive@> primitive(@[@<|"floatingpenalty"|@>@], assign_int, int_base+floating_penalty_code);@/ @!@:floating_penalty_}{\.{\\floatingpenalty} primitive@> primitive(@[@<|"globaldefs"|@>@], assign_int, int_base+global_defs_code);@/ @!@:global_defs_}{\.{\\globaldefs} primitive@> primitive(@[@<|"fam"|@>@], assign_int, int_base+cur_fam_code);@/ @!@:fam_}{\.{\\fam} primitive@> primitive(@[@<|"escapechar"|@>@], assign_int, int_base+escape_char_code);@/ @!@:escape_char_}{\.{\\escapechar} primitive@> primitive(@[@<|"defaulthyphenchar"|@>@], assign_int, int_base+default_hyphen_char_code);@/ @!@:default_hyphen_char_}{\.{\\defaulthyphenchar} primitive@> primitive(@[@<|"defaultskewchar"|@>@], assign_int, int_base+default_skew_char_code);@/ @!@:default_skew_char_}{\.{\\defaultskewchar} primitive@> primitive(@[@<|"endlinechar"|@>@], assign_int, int_base+end_line_char_code);@/ @!@:end_line_char_}{\.{\\endlinechar} primitive@> primitive(@[@<|"newlinechar"|@>@], assign_int, int_base+new_line_char_code);@/ @!@:new_line_char_}{\.{\\newlinechar} primitive@> primitive(@[@<|"language"|@>@], assign_int, int_base+language_code);@/ @!@:language_}{\.{\\language} primitive@> primitive(@[@<|"lefthyphenmin"|@>@], assign_int, int_base+left_hyphen_min_code);@/ @!@:left_hyphen_min_}{\.{\\lefthyphenmin} primitive@> primitive(@[@<|"righthyphenmin"|@>@], assign_int, int_base+right_hyphen_min_code);@/ @!@:right_hyphen_min_}{\.{\\righthyphenmin} primitive@> primitive(@[@<|"holdinginserts"|@>@], assign_int, int_base+holding_inserts_code);@/ @!@:holding_inserts_}{\.{\\holdinginserts} primitive@> primitive(@[@<|"errorcontextlines"|@>@], assign_int, int_base+error_context_lines_code);@/ @!@:error_context_lines_}{\.{\\errorcontextlines} primitive@> @ @= case assign_int: if (chr_code < count_base) print_param(chr_code-int_base); else{@+print_esc(@[@<|"count"|@>@]);print_int(chr_code-count_base); } @+break; @ The integer parameters should really be initialized by a macro package; the following initialization does the minimum to keep \TeX\ from complete failure. @^null delimiter@> @= for (k=int_base; k<=del_code_base-1; k++) eqtb[k].i=0; mag=1000;tolerance=10000;hang_after=1;max_dead_cycles=25; escape_char='\\';end_line_char=carriage_return; for (k=0; k<=255; k++) del_code(k)=-1; del_code('.')=0; /*this null delimiter is used in error recovery*/ @ The following procedure, which is called just before \TeX\ initializes its input and output, establishes the initial values of the date and time. @^system dependencies@> Since standard \PASCAL\ cannot provide such information, something special is needed. The program here simply specifies July 4, 1776, at noon; but users probably want a better approximation to the truth. @p void fix_date_and_time(void) {@+time=12*60; /*minutes since midnight*/ day=4; /*fourth day of the month*/ month=7; /*seventh month of the year*/ year=1776; /*Anno Domini*/ } @ @= {@+if (n < count_base) print_param(n-int_base); else if (n < del_code_base) {@+print_esc(@[@<|"count"|@>@]);print_int(n-count_base); } else{@+print_esc(@[@<|"delcode"|@>@]);print_int(n-del_code_base); } print_char('=');print_int(eqtb[n].i); } @ @=c=escape_char @ @=s==new_line_char @ \TeX\ is occasionally supposed to print diagnostic information that goes only into the transcript file, unless |tracing_online| is positive. Here are two routines that adjust the destination of print commands: @p void begin_diagnostic(void) /*prepare to do some tracing*/ {@+old_setting=selector; if ((tracing_online <= 0)&&(selector==term_and_log)) {@+decr(selector); if (history==spotless) history=warning_issued; } } @# void end_diagnostic(bool @!blank_line) /*restore proper conditions after tracing*/ {@+print_nl(""); if (blank_line) print_ln(); selector=old_setting; } @ Of course we had better declare another global variable, if the previous routines are going to work. @= uint8_t @!old_setting; @ The final region of |eqtb| contains the dimension parameters defined here, and the 256 \.{\\dimen} registers. @d par_indent_code 0 /*indentation of paragraphs*/ @d math_surround_code 1 /*space around math in text*/ @d line_skip_limit_code 2 /*threshold for |line_skip| instead of |baseline_skip|*/ @d hsize_code 3 /*line width in horizontal mode*/ @d vsize_code 4 /*page height in vertical mode*/ @d max_depth_code 5 /*maximum depth of boxes on main pages*/ @d split_max_depth_code 6 /*maximum depth of boxes on split pages*/ @d box_max_depth_code 7 /*maximum depth of explicit vboxes*/ @d hfuzz_code 8 /*tolerance for overfull hbox messages*/ @d vfuzz_code 9 /*tolerance for overfull vbox messages*/ @d delimiter_shortfall_code 10 /*maximum amount uncovered by variable delimiters*/ @d null_delimiter_space_code 11 /*blank space in null delimiters*/ @d script_space_code 12 /*extra space after subscript or superscript*/ @d pre_display_size_code 13 /*length of text preceding a display*/ @d display_width_code 14 /*length of line for displayed equation*/ @d display_indent_code 15 /*indentation of line for displayed equation*/ @d overfull_rule_code 16 /*width of rule that identifies overfull hboxes*/ @d hang_indent_code 17 /*amount of hanging indentation*/ @d h_offset_code 18 /*amount of horizontal offset when shipping pages out*/ @d v_offset_code 19 /*amount of vertical offset when shipping pages out*/ @d emergency_stretch_code 20 /*reduces badnesses on final pass of line-breaking*/ @d dimen_pars 21 /*total number of dimension parameters*/ @d scaled_base (dimen_base+dimen_pars) /*table of 256 user-defined \.{\\dimen} registers*/ @d eqtb_size (scaled_base+255) /*largest subscript of |eqtb|*/ @# @d dimen(X) eqtb[scaled_base+X].sc @d dimen_par(X) eqtb[dimen_base+X].sc /*a scaled quantity*/ @d par_indent dimen_par(par_indent_code) @d math_surround dimen_par(math_surround_code) @d line_skip_limit dimen_par(line_skip_limit_code) @d hsize dimen_par(hsize_code) @d vsize dimen_par(vsize_code) @d max_depth dimen_par(max_depth_code) @d split_max_depth dimen_par(split_max_depth_code) @d box_max_depth dimen_par(box_max_depth_code) @d hfuzz dimen_par(hfuzz_code) @d vfuzz dimen_par(vfuzz_code) @d delimiter_shortfall dimen_par(delimiter_shortfall_code) @d null_delimiter_space dimen_par(null_delimiter_space_code) @d script_space dimen_par(script_space_code) @d pre_display_size dimen_par(pre_display_size_code) @d display_width dimen_par(display_width_code) @d display_indent dimen_par(display_indent_code) @d overfull_rule dimen_par(overfull_rule_code) @d hang_indent dimen_par(hang_indent_code) @d h_offset dimen_par(h_offset_code) @d v_offset dimen_par(v_offset_code) @d emergency_stretch dimen_par(emergency_stretch_code) @p void print_length_param(int @!n) {@+switch (n) { case par_indent_code: print_esc(@[@<|"parindent"|@>@]);@+break; case math_surround_code: print_esc(@[@<|"mathsurround"|@>@]);@+break; case line_skip_limit_code: print_esc(@[@<|"lineskiplimit"|@>@]);@+break; case hsize_code: print_esc(@[@<|"hsize"|@>@]);@+break; case vsize_code: print_esc(@[@<|"vsize"|@>@]);@+break; case max_depth_code: print_esc(@[@<|"maxdepth"|@>@]);@+break; case split_max_depth_code: print_esc(@[@<|"splitmaxdepth"|@>@]);@+break; case box_max_depth_code: print_esc(@[@<|"boxmaxdepth"|@>@]);@+break; case hfuzz_code: print_esc(@[@<|"hfuzz"|@>@]);@+break; case vfuzz_code: print_esc(@[@<|"vfuzz"|@>@]);@+break; case delimiter_shortfall_code: print_esc(@[@<|"delimitershortfall"|@>@]);@+break; case null_delimiter_space_code: print_esc(@[@<|"nulldelimiterspace"|@>@]);@+break; case script_space_code: print_esc(@[@<|"scriptspace"|@>@]);@+break; case pre_display_size_code: print_esc(@[@<|"predisplaysize"|@>@]);@+break; case display_width_code: print_esc(@[@<|"displaywidth"|@>@]);@+break; case display_indent_code: print_esc(@[@<|"displayindent"|@>@]);@+break; case overfull_rule_code: print_esc(@[@<|"overfullrule"|@>@]);@+break; case hang_indent_code: print_esc(@[@<|"hangindent"|@>@]);@+break; case h_offset_code: print_esc(@[@<|"hoffset"|@>@]);@+break; case v_offset_code: print_esc(@[@<|"voffset"|@>@]);@+break; case emergency_stretch_code: print_esc(@[@<|"emergencystretch"|@>@]);@+break; default:print_str("[unknown dimen parameter!]"); } } @ @= primitive(@[@<|"parindent"|@>@], assign_dimen, dimen_base+par_indent_code);@/ @!@:par_indent_}{\.{\\parindent} primitive@> primitive(@[@<|"mathsurround"|@>@], assign_dimen, dimen_base+math_surround_code);@/ @!@:math_surround_}{\.{\\mathsurround} primitive@> primitive(@[@<|"lineskiplimit"|@>@], assign_dimen, dimen_base+line_skip_limit_code);@/ @!@:line_skip_limit_}{\.{\\lineskiplimit} primitive@> primitive(@[@<|"hsize"|@>@], assign_dimen, dimen_base+hsize_code);@/ @!@:hsize_}{\.{\\hsize} primitive@> primitive(@[@<|"vsize"|@>@], assign_dimen, dimen_base+vsize_code);@/ @!@:vsize_}{\.{\\vsize} primitive@> primitive(@[@<|"maxdepth"|@>@], assign_dimen, dimen_base+max_depth_code);@/ @!@:max_depth_}{\.{\\maxdepth} primitive@> primitive(@[@<|"splitmaxdepth"|@>@], assign_dimen, dimen_base+split_max_depth_code);@/ @!@:split_max_depth_}{\.{\\splitmaxdepth} primitive@> primitive(@[@<|"boxmaxdepth"|@>@], assign_dimen, dimen_base+box_max_depth_code);@/ @!@:box_max_depth_}{\.{\\boxmaxdepth} primitive@> primitive(@[@<|"hfuzz"|@>@], assign_dimen, dimen_base+hfuzz_code);@/ @!@:hfuzz_}{\.{\\hfuzz} primitive@> primitive(@[@<|"vfuzz"|@>@], assign_dimen, dimen_base+vfuzz_code);@/ @!@:vfuzz_}{\.{\\vfuzz} primitive@> primitive(@[@<|"delimitershortfall"|@>@], assign_dimen, dimen_base+delimiter_shortfall_code);@/ @!@:delimiter_shortfall_}{\.{\\delimitershortfall} primitive@> primitive(@[@<|"nulldelimiterspace"|@>@], assign_dimen, dimen_base+null_delimiter_space_code);@/ @!@:null_delimiter_space_}{\.{\\nulldelimiterspace} primitive@> primitive(@[@<|"scriptspace"|@>@], assign_dimen, dimen_base+script_space_code);@/ @!@:script_space_}{\.{\\scriptspace} primitive@> primitive(@[@<|"predisplaysize"|@>@], assign_dimen, dimen_base+pre_display_size_code);@/ @!@:pre_display_size_}{\.{\\predisplaysize} primitive@> primitive(@[@<|"displaywidth"|@>@], assign_dimen, dimen_base+display_width_code);@/ @!@:display_width_}{\.{\\displaywidth} primitive@> primitive(@[@<|"displayindent"|@>@], assign_dimen, dimen_base+display_indent_code);@/ @!@:display_indent_}{\.{\\displayindent} primitive@> primitive(@[@<|"overfullrule"|@>@], assign_dimen, dimen_base+overfull_rule_code);@/ @!@:overfull_rule_}{\.{\\overfullrule} primitive@> primitive(@[@<|"hangindent"|@>@], assign_dimen, dimen_base+hang_indent_code);@/ @!@:hang_indent_}{\.{\\hangindent} primitive@> primitive(@[@<|"hoffset"|@>@], assign_dimen, dimen_base+h_offset_code);@/ @!@:h_offset_}{\.{\\hoffset} primitive@> primitive(@[@<|"voffset"|@>@], assign_dimen, dimen_base+v_offset_code);@/ @!@:v_offset_}{\.{\\voffset} primitive@> primitive(@[@<|"emergencystretch"|@>@], assign_dimen, dimen_base+emergency_stretch_code);@/ @!@:emergency_stretch_}{\.{\\emergencystretch} primitive@> @ @= case assign_dimen: if (chr_code < scaled_base) print_length_param(chr_code-dimen_base); else{@+print_esc(@[@<|"dimen"|@>@]);print_int(chr_code-scaled_base); } @+break; @ @= for (k=dimen_base; k<=eqtb_size; k++) eqtb[k].sc=0; @ @= {@+if (n < scaled_base) print_length_param(n-dimen_base); else{@+print_esc(@[@<|"dimen"|@>@]);print_int(n-scaled_base); } print_char('=');print_scaled(eqtb[n].sc);print_str("pt"); } @ Here is a procedure that displays the contents of |eqtb[n]| symbolically. @p@t\4@>@@;@/ #ifdef @!STAT void show_eqtb(pointer @!n) {@+if (n < active_base) print_char('?'); /*this can't happen*/ else if (n < glue_base) @@; else if (n < local_base) @@; else if (n < int_base) @@; else if (n < dimen_base) @@; else if (n <= eqtb_size) @@; else print_char('?'); /*this can't happen either*/ } #endif @ The last two regions of |eqtb| have fullword values instead of the three fields |eq_level|, |eq_type|, and |equiv|. An |eq_type| is unnecessary, but \TeX\ needs to store the |eq_level| information in another array called |xeq_level|. @= memory_word @!eqtb0[eqtb_size-active_base+1], *const @!eqtb = @!eqtb0-active_base; quarterword @!xeq_level0[eqtb_size-int_base+1], *const @!xeq_level = @!xeq_level0-int_base; @ @= for (k=int_base; k<=eqtb_size; k++) xeq_level[k]=level_one; @ When the debugging routine |search_mem| is looking for pointers having a given value, it is interested only in regions 1 to~3 of~|eqtb|, and in the first part of region~4. @= for (q=active_base; q<=box_base+255; q++) {@+if (equiv(q)==p) {@+print_nl("EQUIV(");print_int(q);print_char(')'); } } @* The hash table. Control sequences are stored and retrieved by means of a fairly standard hash table algorithm called the method of ``coalescing lists'' (cf.\ Algorithm 6.4C in {\sl The Art of Computer Programming\/}). Once a control sequence enters the table, it is never removed, because there are complicated situations involving \.{\\gdef} where the removal of a control sequence at the end of a group would be a mistake preventable only by the introduction of a complicated reference-count mechanism. The actual sequence of letters forming a control sequence identifier is stored in the |str_pool| array together with all the other strings. An auxiliary array |hash| consists of items with two halfword fields per word. The first of these, called |next(p)|, points to the next identifier belonging to the same coalesced list as the identifier corresponding to~|p|; and the other, called |text(p)|, points to the |str_start| entry for |p|'s identifier. If position~|p| of the hash table is empty, we have |text(p)==0|; if position |p| is either empty or the end of a coalesced hash list, we have |next(p)==0|. An auxiliary pointer variable called |hash_used| is maintained in such a way that all locations |p >= hash_used| are nonempty. The global variable |cs_count| tells how many multiletter control sequences have been defined, if statistics are being kept. A global boolean variable called |no_new_control_sequence| is set to |true| during the time that new hash table entries are forbidden. @d next(X) hash[X].lh /*link for coalesced lists*/ @d text(X) hash[X].rh /*string number for control sequence name*/ @d hash_is_full (hash_used==hash_base) /*test if all positions are occupied*/ @d font_id_text(X) text(font_id_base+X) /*a frozen font identifier's name*/ @= two_halves @!hash0[undefined_control_sequence-hash_base], *const @!hash = @!hash0-hash_base; /*the hash table*/ pointer @!hash_used; /*allocation pointer for |hash|*/ bool @!no_new_control_sequence; /*are new identifiers legal?*/ int @!cs_count; /*total number of known identifiers*/ @ @= no_new_control_sequence=true; /*new identifiers are usually forbidden*/ next(hash_base)=0;text(hash_base)=0; for (k=hash_base+1; k<=undefined_control_sequence-1; k++) hash[k]=hash[hash_base]; @ @= hash_used=frozen_control_sequence; /*nothing is used*/ cs_count=0; eq_type(frozen_dont_expand)=dont_expand; text(frozen_dont_expand)=@[@<|"notexpanded:"|@>@]; @.notexpanded:@> @ Here is the subroutine that searches the hash table for an identifier that matches a given string of length |l > 1| appearing in |buffer[j dotdot (j+l-1)]|. If the identifier is found, the corresponding hash table address is returned. Otherwise, if the global variable |no_new_control_sequence| is |true|, the dummy address |undefined_control_sequence| is returned. Otherwise the identifier is inserted into the hash table and its location is returned. @p pointer id_lookup(int @!j, int @!l) /*search the hash table*/ {@+ /*go here if you found it*/ int h; /*hash code*/ int @!d; /*number of characters in incomplete current string*/ pointer @!p; /*index in |hash| array*/ pointer @!k; /*index in |buffer| array*/ @; p=h+hash_base; /*we start searching here; note that |0 <= h < hash_prime|*/ loop@+{@+if (text(p) > 0) if (length(text(p))==l) if (str_eq_buf(text(p), j)) goto found; if (next(p)==0) {@+if (no_new_control_sequence) p=undefined_control_sequence; else@; goto found; } p=next(p); } found: return p; } @ @= {@+if (text(p) > 0) {@+@/do@+{if (hash_is_full) overflow("hash size", hash_size); @:TeX capacity exceeded hash size}{\quad hash size@> decr(hash_used); }@+ while (!(text(hash_used)==0)); /*search for an empty location in |hash|*/ next(p)=hash_used;p=hash_used; } str_room(l);d=cur_length; while (pool_ptr > str_start[str_ptr]) {@+decr(pool_ptr);str_pool[pool_ptr+l]=str_pool[pool_ptr]; } /*move current string up to make room for another*/ for (k=j; k<=j+l-1; k++) append_char(buffer[k]); text(p)=make_string();pool_ptr=pool_ptr+d; #ifdef @!STAT incr(cs_count); #endif @;@/ } @ The value of |hash_prime| should be roughly 85\pct! of |hash_size|, and it should be a prime number. The theory of hashing tells us to expect fewer than two table probes, on the average, when the search is successful. [See J.~S. Vitter, {\sl Journal of the ACM\/ \bf30} (1983), 231--258.] @^Vitter, Jeffrey Scott@> @= h=buffer[j]; for (k=j+1; k<=j+l-1; k++) {@+h=h+h+buffer[k]; while (h >= hash_prime) h=h-hash_prime; } @ Single-character control sequences do not need to be looked up in a hash table, since we can use the character code itself as a direct address. The procedure |print_cs| prints the name of a control sequence, given a pointer to its address in |eqtb|. A space is printed after the name unless it is a single nonletter or an active character. This procedure might be invoked with invalid data, so it is ``extra robust.'' The individual characters must be printed one at a time using |print|, since they may be unprintable. @= void print_cs(int @!p) /*prints a purported control sequence*/ {@+if (p < hash_base) /*single character*/ if (p >= single_base) if (p==null_cs) {@+print_esc(@[@<|"csname"|@>@]);print_esc(@[@<|"endcsname"|@>@]);print_char(' '); } else{@+print_esc(p-single_base); if (cat_code(p-single_base)==letter) print_char(' '); } else if (p < active_base) print_esc(@[@<|"IMPOSSIBLE."|@>@]); @.IMPOSSIBLE@> else print(p-active_base); else if (p >= undefined_control_sequence) print_esc(@[@<|"IMPOSSIBLE."|@>@]); else if ((text(p) < 0)||(text(p) >= str_ptr)) print_esc(@[@<|"NONEXISTENT."|@>@]); @.NONEXISTENT@> else{@+print_esc(text(p)); print_char(' '); } } @ Here is a similar procedure; it avoids the error checks, and it never prints a space after the control sequence. @= void sprint_cs(pointer @!p) /*prints a control sequence*/ {@+if (p < hash_base) if (p < single_base) print(p-active_base); else if (p < null_cs) print_esc(p-single_base); else{@+print_esc(@[@<|"csname"|@>@]);print_esc(@[@<|"endcsname"|@>@]); } else print_esc(text(p)); } @ We need to put \TeX's ``primitive'' control sequences into the hash table, together with their command code (which will be the |eq_type|) and an operand (which will be the |equiv|). The |primitive| procedure does this, in a way that no \TeX\ user can. The global value |cur_val| contains the new |eqtb| pointer after |primitive| has acted. @p #ifdef @!INIT void primitive(str_number @!s, quarterword @!c, halfword @!o) {@+pool_pointer k; /*index into |str_pool|*/ int @!j; /*index into |buffer|*/ small_number @!l; /*length of the string*/ if (s < 256) cur_val=s+single_base; else{@+k=str_start[s];l=str_start[s+1]-k; /*we will move |s| into the (empty) |buffer|*/ for (j=0; j<=l-1; j++) buffer[j]=so(str_pool[k+j]); cur_val=id_lookup(0, l); /*|no_new_control_sequence| is |false|*/ flush_string;text(cur_val)=s; /*we don't want to have the string twice*/ } eq_level(cur_val)=level_one;eq_type(cur_val)=c;equiv(cur_val)=o; } #endif @ Many of \TeX's primitives need no |equiv|, since they are identifiable by their |eq_type| alone. These primitives are loaded into the hash table as follows: @= primitive(' ', ex_space, 0);@/ @!@:Single-character primitives /}{\quad\.{\\\ }@> primitive('/', ital_corr, 0);@/ @!@:Single-character primitives /}{\quad\.{\\/}@> primitive(@[@<|"accent"|@>@], accent, 0);@/ @!@:accent_}{\.{\\accent} primitive@> primitive(@[@<|"advance"|@>@], advance, 0);@/ @!@:advance_}{\.{\\advance} primitive@> primitive(@[@<|"afterassignment"|@>@], after_assignment, 0);@/ @!@:after_assignment_}{\.{\\afterassignment} primitive@> primitive(@[@<|"aftergroup"|@>@], after_group, 0);@/ @!@:after_group_}{\.{\\aftergroup} primitive@> primitive(@[@<|"begingroup"|@>@], begin_group, 0);@/ @!@:begin_group_}{\.{\\begingroup} primitive@> primitive(@[@<|"char"|@>@], char_num, 0);@/ @!@:char_}{\.{\\char} primitive@> primitive(@[@<|"csname"|@>@], cs_name, 0);@/ @!@:cs_name_}{\.{\\csname} primitive@> primitive(@[@<|"delimiter"|@>@], delim_num, 0);@/ @!@:delimiter_}{\.{\\delimiter} primitive@> primitive(@[@<|"divide"|@>@], divide, 0);@/ @!@:divide_}{\.{\\divide} primitive@> primitive(@[@<|"endcsname"|@>@], end_cs_name, 0);@/ @!@:end_cs_name_}{\.{\\endcsname} primitive@> primitive(@[@<|"endgroup"|@>@], end_group, 0); @!@:end_group_}{\.{\\endgroup} primitive@> text(frozen_end_group)=@[@<|"endgroup"|@>@];eqtb[frozen_end_group]=eqtb[cur_val];@/ primitive(@[@<|"expandafter"|@>@], expand_after, 0);@/ @!@:expand_after_}{\.{\\expandafter} primitive@> primitive(@[@<|"font"|@>@], def_font, 0);@/ @!@:font_}{\.{\\font} primitive@> primitive(@[@<|"fontdimen"|@>@], assign_font_dimen, 0);@/ @!@:font_dimen_}{\.{\\fontdimen} primitive@> primitive(@[@<|"halign"|@>@], halign, 0);@/ @!@:halign_}{\.{\\halign} primitive@> primitive(@[@<|"hrule"|@>@], hrule, 0);@/ @!@:hrule_}{\.{\\hrule} primitive@> primitive(@[@<|"ignorespaces"|@>@], ignore_spaces, 0);@/ @!@:ignore_spaces_}{\.{\\ignorespaces} primitive@> primitive(@[@<|"insert"|@>@], insert, 0);@/ @!@:insert_}{\.{\\insert} primitive@> primitive(@[@<|"mark"|@>@], mark, 0);@/ @!@:mark_}{\.{\\mark} primitive@> primitive(@[@<|"mathaccent"|@>@], math_accent, 0);@/ @!@:math_accent_}{\.{\\mathaccent} primitive@> primitive(@[@<|"mathchar"|@>@], math_char_num, 0);@/ @!@:math_char_}{\.{\\mathchar} primitive@> primitive(@[@<|"mathchoice"|@>@], math_choice, 0);@/ @!@:math_choice_}{\.{\\mathchoice} primitive@> primitive(@[@<|"multiply"|@>@], multiply, 0);@/ @!@:multiply_}{\.{\\multiply} primitive@> primitive(@[@<|"noalign"|@>@], no_align, 0);@/ @!@:no_align_}{\.{\\noalign} primitive@> primitive(@[@<|"noboundary"|@>@], no_boundary, 0);@/ @!@:no_boundary_}{\.{\\noboundary} primitive@> primitive(@[@<|"noexpand"|@>@], no_expand, 0);@/ @!@:no_expand_}{\.{\\noexpand} primitive@> primitive(@[@<|"nonscript"|@>@], non_script, 0);@/ @!@:non_script_}{\.{\\nonscript} primitive@> primitive(@[@<|"omit"|@>@], omit, 0);@/ @!@:omit_}{\.{\\omit} primitive@> primitive(@[@<|"parshape"|@>@], set_shape, 0);@/ @!@:par_shape_}{\.{\\parshape} primitive@> primitive(@[@<|"penalty"|@>@], break_penalty, 0);@/ @!@:penalty_}{\.{\\penalty} primitive@> primitive(@[@<|"prevgraf"|@>@], set_prev_graf, 0);@/ @!@:prev_graf_}{\.{\\prevgraf} primitive@> primitive(@[@<|"radical"|@>@], radical, 0);@/ @!@:radical_}{\.{\\radical} primitive@> primitive(@[@<|"read"|@>@], read_to_cs, 0);@/ @!@:read_}{\.{\\read} primitive@> primitive(@[@<|"relax"|@>@], relax, 256); /*cf.\ |scan_file_name|*/ @!@:relax_}{\.{\\relax} primitive@> text(frozen_relax)=@[@<|"relax"|@>@];eqtb[frozen_relax]=eqtb[cur_val];@/ primitive(@[@<|"setbox"|@>@], set_box, 0);@/ @!@:set_box_}{\.{\\setbox} primitive@> primitive(@[@<|"the"|@>@], the, 0);@/ @!@:the_}{\.{\\the} primitive@> primitive(@[@<|"toks"|@>@], toks_register, 0);@/ @!@:toks_}{\.{\\toks} primitive@> primitive(@[@<|"vadjust"|@>@], vadjust, 0);@/ @!@:vadjust_}{\.{\\vadjust} primitive@> primitive(@[@<|"valign"|@>@], valign, 0);@/ @!@:valign_}{\.{\\valign} primitive@> primitive(@[@<|"vcenter"|@>@], vcenter, 0);@/ @!@:vcenter_}{\.{\\vcenter} primitive@> primitive(@[@<|"vrule"|@>@], vrule, 0);@/ @!@:vrule_}{\.{\\vrule} primitive@> @ Each primitive has a corresponding inverse, so that it is possible to display the cryptic numeric contents of |eqtb| in symbolic form. Every call of |primitive| in this program is therefore accompanied by some straightforward code that forms part of the |print_cmd_chr| routine below. @= case accent: print_esc(@[@<|"accent"|@>@]);@+break; case advance: print_esc(@[@<|"advance"|@>@]);@+break; case after_assignment: print_esc(@[@<|"afterassignment"|@>@]);@+break; case after_group: print_esc(@[@<|"aftergroup"|@>@]);@+break; case assign_font_dimen: print_esc(@[@<|"fontdimen"|@>@]);@+break; case begin_group: print_esc(@[@<|"begingroup"|@>@]);@+break; case break_penalty: print_esc(@[@<|"penalty"|@>@]);@+break; case char_num: print_esc(@[@<|"char"|@>@]);@+break; case cs_name: print_esc(@[@<|"csname"|@>@]);@+break; case def_font: print_esc(@[@<|"font"|@>@]);@+break; case delim_num: print_esc(@[@<|"delimiter"|@>@]);@+break; case divide: print_esc(@[@<|"divide"|@>@]);@+break; case end_cs_name: print_esc(@[@<|"endcsname"|@>@]);@+break; case end_group: print_esc(@[@<|"endgroup"|@>@]);@+break; case ex_space: print_esc(' ');@+break; case expand_after: print_esc(@[@<|"expandafter"|@>@]);@+break; case halign: print_esc(@[@<|"halign"|@>@]);@+break; case hrule: print_esc(@[@<|"hrule"|@>@]);@+break; case ignore_spaces: print_esc(@[@<|"ignorespaces"|@>@]);@+break; case insert: print_esc(@[@<|"insert"|@>@]);@+break; case ital_corr: print_esc('/');@+break; case mark: print_esc(@[@<|"mark"|@>@]);@+break; case math_accent: print_esc(@[@<|"mathaccent"|@>@]);@+break; case math_char_num: print_esc(@[@<|"mathchar"|@>@]);@+break; case math_choice: print_esc(@[@<|"mathchoice"|@>@]);@+break; case multiply: print_esc(@[@<|"multiply"|@>@]);@+break; case no_align: print_esc(@[@<|"noalign"|@>@]);@+break; case no_boundary: print_esc(@[@<|"noboundary"|@>@]);@+break; case no_expand: print_esc(@[@<|"noexpand"|@>@]);@+break; case non_script: print_esc(@[@<|"nonscript"|@>@]);@+break; case omit: print_esc(@[@<|"omit"|@>@]);@+break; case radical: print_esc(@[@<|"radical"|@>@]);@+break; case read_to_cs: print_esc(@[@<|"read"|@>@]);@+break; case relax: print_esc(@[@<|"relax"|@>@]);@+break; case set_box: print_esc(@[@<|"setbox"|@>@]);@+break; case set_prev_graf: print_esc(@[@<|"prevgraf"|@>@]);@+break; case set_shape: print_esc(@[@<|"parshape"|@>@]);@+break; case the: print_esc(@[@<|"the"|@>@]);@+break; case toks_register: print_esc(@[@<|"toks"|@>@]);@+break; case vadjust: print_esc(@[@<|"vadjust"|@>@]);@+break; case valign: print_esc(@[@<|"valign"|@>@]);@+break; case vcenter: print_esc(@[@<|"vcenter"|@>@]);@+break; case vrule: print_esc(@[@<|"vrule"|@>@]);@+break; @ We will deal with the other primitives later, at some point in the program where their |eq_type| and |equiv| values are more meaningful. For example, the primitives for math mode will be loaded when we consider the routines that deal with formulas. It is easy to find where each particular primitive was treated by looking in the index at the end; for example, the section where |@[@<|"radical"|@>@]| entered |eqtb| is listed under `\.{\\radical} primitive'. (Primitives consisting of a single nonalphabetic character, @!like `\.{\\/}', are listed under `Single-character primitives'.) @!@^Single-character primitives@> Meanwhile, this is a convenient place to catch up on something we were unable to do before the hash table was defined: @= print_esc(font_id_text(font(p))) @* Saving and restoring equivalents. The nested structure provided by `$\.{\char'173}\ldots\.{\char'175}$' groups in \TeX\ means that |eqtb| entries valid in outer groups should be saved and restored later if they are overridden inside the braces. When a new |eqtb| value is being assigned, the program therefore checks to see if the previous entry belongs to an outer level. In such a case, the old value is placed on the |save_stack| just before the new value enters |eqtb|. At the end of a grouping level, i.e., when the right brace is sensed, the |save_stack| is used to restore the outer values, and the inner ones are destroyed. Entries on the |save_stack| are of type |memory_word|. The top item on this stack is |save_stack[p]|, where |p==save_ptr-1|; it contains three fields called |save_type|, |save_level|, and |save_index|, and it is interpreted in one of four ways: \yskip\hangg 1) If |save_type(p)==restore_old_value|, then |save_index(p)| is a location in |eqtb| whose current value should be destroyed at the end of the current group and replaced by |save_stack[p-1]|. Furthermore if |save_index(p) >= int_base|, then |save_level(p)| should replace the corresponding entry in |xeq_level|. \yskip\hangg 2) If |save_type(p)==restore_zero|, then |save_index(p)| is a location in |eqtb| whose current value should be destroyed at the end of the current group, when it should be replaced by the current value of |eqtb[undefined_control_sequence]|. \yskip\hangg 3) If |save_type(p)==insert_token|, then |save_index(p)| is a token that should be inserted into \TeX's input when the current group ends. \yskip\hangg 4) If |save_type(p)==level_boundary|, then |save_level(p)| is a code explaining what kind of group we were previously in, and |save_index(p)| points to the level boundary word at the bottom of the entries for that group. @d save_type(X) save_stack[X].hh.b0 /*classifies a |save_stack| entry*/ @d save_level(X) save_stack[X].hh.b1 /*saved level for regions 5 and 6, or group code*/ @d save_index(X) save_stack[X].hh.rh /*|eqtb| location or token or |save_stack| location*/ @d restore_old_value 0 /*|save_type| when a value should be restored later*/ @d restore_zero 1 /*|save_type| when an undefined entry should be restored*/ @d insert_token 2 /*|save_type| when a token is being saved for later use*/ @d level_boundary 3 /*|save_type| corresponding to beginning of group*/ @ Here are the group codes that are used to discriminate between different kinds of groups. They allow \TeX\ to decide what special actions, if any, should be performed when a group ends. \def\grp{\.{\char'173...\char'175}} Some groups are not supposed to be ended by right braces. For example, the `\.\$' that begins a math formula causes a |math_shift_group| to be started, and this should be terminated by a matching `\.\$'. Similarly, a group that starts with \.{\\left} should end with \.{\\right}, and one that starts with \.{\\begingroup} should end with \.{\\endgroup}. @d bottom_level 0 /*group code for the outside world*/ @d simple_group 1 /*group code for local structure only*/ @d hbox_group 2 /*code for `\.{\\hbox}\grp'*/ @d adjusted_hbox_group 3 /*code for `\.{\\hbox}\grp' in vertical mode*/ @d vbox_group 4 /*code for `\.{\\vbox}\grp'*/ @d vtop_group 5 /*code for `\.{\\vtop}\grp'*/ @d align_group 6 /*code for `\.{\\halign}\grp', `\.{\\valign}\grp'*/ @d no_align_group 7 /*code for `\.{\\noalign}\grp'*/ @d output_group 8 /*code for output routine*/ @d math_group 9 /*code for, e.g., `\.{\char'136}\grp'*/ @d disc_group 10 /*code for `\.{\\discretionary}\grp\grp\grp'*/ @d insert_group 11 /*code for `\.{\\insert}\grp', `\.{\\vadjust}\grp'*/ @d vcenter_group 12 /*code for `\.{\\vcenter}\grp'*/ @d math_choice_group 13 /*code for `\.{\\mathchoice}\grp\grp\grp\grp'*/ @d semi_simple_group 14 /*code for `\.{\\begingroup...\\endgroup}'*/ @d math_shift_group 15 /*code for `\.{\$...\$}'*/ @d math_left_group 16 /*code for `\.{\\left...\\right}'*/ @d max_group_code 16 @= typedef uint8_t group_code; /*|save_level| for a level boundary*/ @ The global variable |cur_group| keeps track of what sort of group we are currently in. Another global variable, |cur_boundary|, points to the topmost |level_boundary| word. And |cur_level| is the current depth of nesting. The routines are designed to preserve the condition that no entry in the |save_stack| or in |eqtb| ever has a level greater than |cur_level|. @ @= memory_word @!save_stack[save_size+1]; uint16_t @!save_ptr; /*first unused entry on |save_stack|*/ uint16_t @!max_save_stack; /*maximum usage of save stack*/ quarterword @!cur_level; /*current nesting level for groups*/ group_code @!cur_group; /*current group type*/ uint16_t @!cur_boundary; /*where the current level begins*/ @ At this time it might be a good idea for the reader to review the introduction to |eqtb| that was given above just before the long lists of parameter names. Recall that the ``outer level'' of the program is |level_one|, since undefined control sequences are assumed to be ``defined'' at |level_zero|. @= save_ptr=0;cur_level=level_one;cur_group=bottom_level;cur_boundary=0; max_save_stack=0; @ The following macro is used to test if there is room for up to six more entries on |save_stack|. By making a conservative test like this, we can get by with testing for overflow in only a few places. @d check_full_save_stack if (save_ptr > max_save_stack) {@+max_save_stack=save_ptr; if (max_save_stack > save_size-6) overflow("save size", save_size); @:TeX capacity exceeded save size}{\quad save size@> } @ Procedure |new_save_level| is called when a group begins. The argument is a group identification code like `|hbox_group|'. After calling this routine, it is safe to put five more entries on |save_stack|. In some cases integer-valued items are placed onto the |save_stack| just below a |level_boundary| word, because this is a convenient place to keep information that is supposed to ``pop up'' just when the group has finished. For example, when `\.{\\hbox to 100pt}\grp' is being treated, the 100pt dimension is stored on |save_stack| just before |new_save_level| is called. We use the notation |saved(k)| to stand for an integer item that appears in location |save_ptr+k| of the save stack. @d saved(X) save_stack[save_ptr+X].i @p void new_save_level(group_code @!c) /*begin a new level of grouping*/ {@+check_full_save_stack; save_type(save_ptr)=level_boundary;save_level(save_ptr)=cur_group; save_index(save_ptr)=cur_boundary; if (cur_level==max_quarterword) overflow("grouping levels", @:TeX capacity exceeded grouping levels}{\quad grouping levels@> max_quarterword-min_quarterword); /*quit if |(cur_level+1)| is too big to be stored in |eqtb|*/ cur_boundary=save_ptr;incr(cur_level);incr(save_ptr);cur_group=c; } @ Just before an entry of |eqtb| is changed, the following procedure should be called to update the other data structures properly. It is important to keep in mind that reference counts in |mem| include references from within |save_stack|, so these counts must be handled carefully. @^reference counts@> @p void eq_destroy(memory_word @!w) /*gets ready to forget |w|*/ {@+pointer q; /*|equiv| field of |w|*/ switch (eq_type_field(w)) { case call: case long_call: case outer_call: case long_outer_call: delete_token_ref(equiv_field(w));@+break; case glue_ref: delete_glue_ref(equiv_field(w));@+break; case shape_ref: {@+q=equiv_field(w); /*we need to free a \.{\\parshape} block*/ if (q!=null) free_node(q, info(q)+info(q)+1); } @+break; /*such a block is |2 n+1| words long, where |n==info(q)|*/ case box_ref: flush_node_list(equiv_field(w));@+break; default:do_nothing; } } @ To save a value of |eqtb[p]| that was established at level |l|, we can use the following subroutine. @p void eq_save(pointer @!p, quarterword @!l) /*saves |eqtb[p]|*/ {@+check_full_save_stack; if (l==level_zero) save_type(save_ptr)=restore_zero; else{@+save_stack[save_ptr]=eqtb[p];incr(save_ptr); save_type(save_ptr)=restore_old_value; } save_level(save_ptr)=l;save_index(save_ptr)=p;incr(save_ptr); } @ The procedure |eq_define| defines an |eqtb| entry having specified |eq_type| and |equiv| fields, and saves the former value if appropriate. This procedure is used only for entries in the first four regions of |eqtb|, i.e., only for entries that have |eq_type| and |equiv| fields. After calling this routine, it is safe to put four more entries on |save_stack|, provided that there was room for four more entries before the call, since |eq_save| makes the necessary test. @p void eq_define(pointer @!p, quarterword @!t, halfword @!e) /*new data for |eqtb|*/ {@+if (eq_level(p)==cur_level) eq_destroy(eqtb[p]); else if (cur_level > level_one) eq_save(p, eq_level(p)); eq_level(p)=cur_level;eq_type(p)=t;equiv(p)=e; } @ The counterpart of |eq_define| for the remaining (fullword) positions in |eqtb| is called |eq_word_define|. Since |xeq_level[p] >= level_one| for all |p|, a `|restore_zero|' will never be used in this case. @p void eq_word_define(pointer @!p, int @!w) {@+if (xeq_level[p]!=cur_level) {@+eq_save(p, xeq_level[p]);xeq_level[p]=cur_level; } eqtb[p].i=w; } @ The |eq_define| and |eq_word_define| routines take care of local definitions. @^global definitions@> Global definitions are done in almost the same way, but there is no need to save old values, and the new value is associated with |level_one|. @p void geq_define(pointer @!p, quarterword @!t, halfword @!e) /*global |eq_define|*/ {@+eq_destroy(eqtb[p]); eq_level(p)=level_one;eq_type(p)=t;equiv(p)=e; } @# void geq_word_define(pointer @!p, int @!w) /*global |eq_word_define|*/ {@+eqtb[p].i=w;xeq_level[p]=level_one; } @ Subroutine |save_for_after| puts a token on the stack for save-keeping. @p void save_for_after(halfword @!t) {@+if (cur_level > level_one) {@+check_full_save_stack; save_type(save_ptr)=insert_token;save_level(save_ptr)=level_zero; save_index(save_ptr)=t;incr(save_ptr); } } @ The |unsave| routine goes the other way, taking items off of |save_stack|. This routine takes care of restoration when a level ends; everything belonging to the topmost group is cleared off of the save stack. @p@t\4@>@@;@/ void back_input(void); void unsave(void) /*pops the top level off the save stack*/ {@+ pointer p; /*position to be restored*/ quarterword @!l; /*saved level, if in fullword regions of |eqtb|*/ halfword @!t; /*saved value of |cur_tok|*/ if (cur_level > level_one) {@+decr(cur_level); @; } else confusion(@[@<|"curlevel"|@>@]); /*|unsave| is not used when |cur_group==bottom_level|*/ @:this can't happen curlevel}{\quad curlevel@> } @ @= loop@+{@+decr(save_ptr); if (save_type(save_ptr)==level_boundary) goto done; p=save_index(save_ptr); if (save_type(save_ptr)==insert_token) @@; else{@+if (save_type(save_ptr)==restore_old_value) {@+l=save_level(save_ptr);decr(save_ptr); } else save_stack[save_ptr]=eqtb[undefined_control_sequence]; @; } } done: cur_group=save_level(save_ptr);cur_boundary=save_index(save_ptr) @ A global definition, which sets the level to |level_one|, @^global definitions@> will not be undone by |unsave|. If at least one global definition of |eqtb[p]| has been carried out within the group that just ended, the last such definition will therefore survive. @= if (p < int_base) if (eq_level(p)==level_one) {@+eq_destroy(save_stack[save_ptr]); /*destroy the saved value*/ #ifdef @!STAT if (tracing_restores > 0) restore_trace(p,@[@<|"retaining"|@>@]); #endif @;@/ } else{@+eq_destroy(eqtb[p]); /*destroy the current value*/ eqtb[p]=save_stack[save_ptr]; /*restore the saved value*/ #ifdef @!STAT if (tracing_restores > 0) restore_trace(p,@[@<|"restoring"|@>@]); #endif @;@/ } else if (xeq_level[p]!=level_one) {@+eqtb[p]=save_stack[save_ptr];xeq_level[p]=l; #ifdef @!STAT if (tracing_restores > 0) restore_trace(p,@[@<|"restoring"|@>@]); #endif @;@/ } else{ #ifdef @!STAT if (tracing_restores > 0) restore_trace(p,@[@<|"retaining"|@>@]); #endif @;@/ } @ @= #ifdef @!STAT void restore_trace(pointer @!p, str_number @!s) /*|eqtb[p]| has just been restored or retained*/ {@+begin_diagnostic();print_char('{');print(s);print_char(' '); show_eqtb(p);print_char('}'); end_diagnostic(false); } #endif @ When looking for possible pointers to a memory location, it is helpful to look for references from |eqtb| that might be waiting on the save stack. Of course, we might find spurious pointers too; but this routine is merely an aid when debugging, and at such times we are grateful for any scraps of information, even if they prove to be irrelevant. @^dirty \PASCAL@> @= if (save_ptr > 0) for (q=0; q<=save_ptr-1; q++) {@+if (equiv_field(save_stack[q])==p) {@+print_nl("SAVE(");print_int(q);print_char(')'); } } @ Most of the parameters kept in |eqtb| can be changed freely, but there's an exception: The magnification should not be used with two different values during any \TeX\ job, since a single magnification is applied to an entire run. The global variable |mag_set| is set to the current magnification whenever it becomes necessary to ``freeze'' it at a particular value. @= int @!mag_set; /*if nonzero, this magnification should be used henceforth*/ @ @= mag_set=0; @ The |prepare_mag| subroutine is called whenever \TeX\ wants to use |mag| for magnification. @p void prepare_mag(void) {@+if ((mag_set > 0)&&(mag!=mag_set)) {@+print_err("Incompatible magnification (");print_int(mag); @.Incompatible magnification@> print_str(");");print_nl(" the previous value will be retained"); help2("I can handle only one magnification ratio per job. So I've")@/ ("reverted to the magnification you used earlier on this run.");@/ int_error(mag_set); geq_word_define(int_base+mag_code, mag_set); /*|mag=mag_set|*/ } if ((mag <= 0)||(mag > 32768)) {@+print_err("Illegal magnification has been changed to 1000");@/ @.Illegal magnification...@> help1("The magnification ratio must be between 1 and 32768."); int_error(mag);geq_word_define(int_base+mag_code, 1000); } mag_set=mag; } @* Token lists. A \TeX\ token is either a character or a control sequence, and it is @^token@> represented internally in one of two ways: (1)~A character whose ASCII code number is |c| and whose command code is |m| is represented as the number $2^8m+c$; the command code is in the range |1 <= m <= 14|. (2)~A control sequence whose |eqtb| address is |p| is represented as the number |cs_token_flag+p|. Here |cs_token_flag==@t$2^{12}-1$@>| is larger than $2^8m+c$, yet it is small enough that |cs_token_flag+p < max_halfword|; thus, a token fits comfortably in a halfword. A token |t| represents a |left_brace| command if and only if |t < left_brace_limit|; it represents a |right_brace| command if and only if we have |left_brace_limit <= t < right_brace_limit|; and it represents a |match| or |end_match| command if and only if |match_token <= t <= end_match_token|. The following definitions take care of these token-oriented constants and a few others. @d cs_token_flag 07777 /*amount added to the |eqtb| location in a token that stands for a control sequence; is a multiple of~256, less~1*/ @d left_brace_token 00400 /*$2^8\cdot|left_brace|$*/ @d left_brace_limit 01000 /*$2^8\cdot(|left_brace|+1)$*/ @d right_brace_token 01000 /*$2^8\cdot|right_brace|$*/ @d right_brace_limit 01400 /*$2^8\cdot(|right_brace|+1)$*/ @d math_shift_token 01400 /*$2^8\cdot|math_shift|$*/ @d tab_token 02000 /*$2^8\cdot|tab_mark|$*/ @d out_param_token 02400 /*$2^8\cdot|out_param|$*/ @d space_token 05040 /*$2^8\cdot|spacer|+|' '|$*/ @d letter_token 05400 /*$2^8\cdot|letter|$*/ @d other_token 06000 /*$2^8\cdot|other_char|$*/ @d match_token 06400 /*$2^8\cdot|match|$*/ @d end_match_token 07000 /*$2^8\cdot|end_match|$*/ @ @= if (cs_token_flag+undefined_control_sequence > max_halfword) bad=21; @ A token list is a singly linked list of one-word nodes in |mem|, where each word contains a token and a link. Macro definitions, output-routine definitions, marks, \.{\\write} texts, and a few other things are remembered by \TeX\ in the form of token lists, usually preceded by a node with a reference count in its |token_ref_count| field. The token stored in location |p| is called |info(p)|. Three special commands appear in the token lists of macro definitions. When |m==match|, it means that \TeX\ should scan a parameter for the current macro; when |m==end_match|, it means that parameter matching should end and \TeX\ should start reading the macro text; and when |m==out_param|, it means that \TeX\ should insert parameter number |c| into the text at this point. The enclosing \.{\char'173} and \.{\char'175} characters of a macro definition are omitted, but the final right brace of an output routine is included at the end of its token list. Here is an example macro definition that illustrates these conventions. After \TeX\ processes the text $$\.{\\def\\mac a\#1\#2 \\b \{\#1\\-a \#\#1\#2 \#2\}}$$ the definition of \.{\\mac} is represented as a token list containing $$\def\,{\hskip2pt} \vbox{\halign{\hfil#\hfil\cr (reference count), |letter|\,\.a, |match|\,\#, |match|\,\#, |spacer|\,\.\ , \.{\\b}, |end_match|,\cr |out_param|\,1, \.{\\-}, |letter|\,\.a, |spacer|\,\.\ , |mac_param|\,\#, |other_char|\,\.1,\cr |out_param|\,2, |spacer|\,\.\ , |out_param|\,2.\cr}}$$ The procedure |scan_toks| builds such token lists, and |macro_call| does the parameter matching. @^reference counts@> Examples such as $$\.{\\def\\m\{\\def\\m\{a\}\ b\}}$$ explain why reference counts would be needed even if \TeX\ had no \.{\\let} operation: When the token list for \.{\\m} is being read, the redefinition of \.{\\m} changes the |eqtb| entry before the token list has been fully consumed, so we dare not simply destroy a token list when its control sequence is being redefined. If the parameter-matching part of a definition ends with `\.{\#\{}', the corresponding token list will have `\.\{' just before the `|end_match|' and also at the very end. The first `\.\{' is used to delimit the parameter; the second one keeps the first from disappearing. @ The procedure |show_token_list|, which prints a symbolic form of the token list that starts at a given node |p|, illustrates these conventions. The token list being displayed should not begin with a reference count. However, the procedure is intended to be robust, so that if the memory links are awry or if |p| is not really a pointer to a token list, nothing catastrophic will happen. An additional parameter |q| is also given; this parameter is either null or it points to a node in the token list where a certain magic computation takes place that will be explained later. (Basically, |q| is non-null when we are printing the two-line context information at the time of an error message; |q| marks the place corresponding to where the second line should begin.) For example, if |p| points to the node containing the first \.a in the token list above, then |show_token_list| will print the string $$\hbox{`\.{a\#1\#2\ \\b\ ->\#1\\-a\ \#\#1\#2\ \#2}';}$$ and if |q| points to the node containing the second \.a, the magic computation will be performed just before the second \.a is printed. The generation will stop, and `\.{\\ETC.}' will be printed, if the length of printing exceeds a given limit~|l|. Anomalous entries are printed in the form of control sequences that are not followed by a blank space, e.g., `\.{\\BAD.}'; this cannot be confused with actual control sequences because a real control sequence named \.{BAD} would come out `\.{\\BAD\ }'. @= void show_token_list(int @!p, int @!q, int @!l) {@+ int m, @!c; /*pieces of a token*/ ASCII_code @!match_chr; /*character used in a `|match|'*/ ASCII_code @!n; /*the highest parameter number, as an ASCII digit*/ match_chr='#';n='0';tally=0; while ((p!=null)&&(tally < l)) {@+if (p==q) @; @; p=link(p); } if (p!=null) print_esc(@[@<|"ETC."|@>@]); @.ETC@> } @ @= if ((p < hi_mem_min)||(p > mem_end)) {@+print_esc(@[@<|"CLOBBERED."|@>@]);return; @.CLOBBERED@> } if (info(p) >= cs_token_flag) print_cs(info(p)-cs_token_flag); else{@+m=info(p)/0400;c=info(p)%0400; if (info(p) < 0) print_esc(@[@<|"BAD."|@>@]); @.BAD@> else@; } @ The procedure usually ``learns'' the character code used for macro parameters by seeing one in a |match| command before it runs into any |out_param| commands. @= switch (m) { case left_brace: case right_brace: case math_shift: case tab_mark: case sup_mark: case sub_mark: case spacer: case letter: case other_char: print(c);@+break; case mac_param: {@+print(c);print(c); } @+break; case out_param: {@+print(match_chr); if (c <= 9) print_char(c+'0'); else{@+print_char('!');return; } } @+break; case match: {@+match_chr=c;print(c);incr(n);print_char(n); if (n > '9') return; } @+break; case end_match: print_str("->");@+break; @.->@> default:print_esc(@[@<|"BAD."|@>@]); @.BAD@> } @ Here's the way we sometimes want to display a token list, given a pointer to its reference count; the pointer may be null. @p void token_show(pointer @!p) {@+if (p!=null) show_token_list(link(p), null, 10000000); } @ The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form, including the expansion of a macro or mark. @p void print_meaning(void) {@+print_cmd_chr(cur_cmd, cur_chr); if (cur_cmd >= call) {@+print_char(':');print_ln();token_show(cur_chr); } else if (cur_cmd==top_bot_mark) {@+print_char(':');print_ln(); token_show(cur_mark[cur_chr]); } } @* Introduction to the syntactic routines. Let's pause a moment now and try to look at the Big Picture. The \TeX\ program consists of three main parts: syntactic routines, semantic routines, and output routines. The chief purpose of the syntactic routines is to deliver the user's input to the semantic routines, one token at a time. The semantic routines act as an interpreter responding to these tokens, which may be regarded as commands. And the output routines are periodically called on to convert box-and-glue lists into a compact set of instructions that will be sent to a typesetter. We have discussed the basic data structures and utility routines of \TeX, so we are good and ready to plunge into the real activity by considering the syntactic routines. Our current goal is to come to grips with the |get_next| procedure, which is the keystone of \TeX's input mechanism. Each call of |get_next| sets the value of three variables |cur_cmd|, |cur_chr|, and |cur_cs|, representing the next input token. $$\vbox{\halign{#\hfil\cr \hbox{|cur_cmd| denotes a command code from the long list of codes given above;}\cr \hbox{|cur_chr| denotes a character code or other modifier of the command code;}\cr \hbox{|cur_cs| is the |eqtb| location of the current control sequence,}\cr \hbox{\qquad if the current token was a control sequence, otherwise it's zero.}\cr}}$$ Underlying this external behavior of |get_next| is all the machinery necessary to convert from character files to tokens. At a given time we may be only partially finished with the reading of several files (for which \.{\\input} was specified), and partially finished with the expansion of some user-defined macros and/or some macro parameters, and partially finished with the generation of some text in a template for \.{\\halign}, and so on. When reading a character file, special characters must be classified as math delimiters, etc.; comments and extra blank spaces must be removed, paragraphs must be recognized, and control sequences must be found in the hash table. Furthermore there are occasions in which the scanning routines have looked ahead for a word like `\.{plus}' but only part of that word was found, hence a few characters must be put back into the input and scanned again. To handle these situations, which might all be present simultaneously, \TeX\ uses various stacks that hold information about the incomplete activities, and there is a finite state control for each level of the input mechanism. These stacks record the current state of an implicitly recursive process, but the |get_next| procedure is not recursive. Therefore it will not be difficult to translate these algorithms into low-level languages that do not support recursion. @= eight_bits @!cur_cmd; /*current command set by |get_next|*/ halfword @!cur_chr; /*operand of current command*/ pointer @!cur_cs; /*control sequence found here, zero if none found*/ halfword @!cur_tok; /*packed representative of |cur_cmd| and |cur_chr|*/ @ The |print_cmd_chr| routine prints a symbolic interpretation of a command code and its modifier. This is used in certain `\.{You can\'t}' error messages, and in the implementation of diagnostic routines like \.{\\show}. The body of |print_cmd_chr| is a rather tedious listing of print commands, and most of it is essentially an inverse to the |primitive| routine that enters a \TeX\ primitive into |eqtb|. Therefore much of this procedure appears elsewhere in the program, together with the corresponding |primitive| calls. @d chr_cmd(X) {@+print_str(X);print_ASCII(chr_code); } @= void print_cmd_chr(quarterword @!cmd, halfword @!chr_code) {@+switch (cmd) { case left_brace: chr_cmd("begin-group character ")@;@+break; case right_brace: chr_cmd("end-group character ")@;@+break; case math_shift: chr_cmd("math shift character ")@;@+break; case mac_param: chr_cmd("macro parameter character ")@;@+break; case sup_mark: chr_cmd("superscript character ")@;@+break; case sub_mark: chr_cmd("subscript character ")@;@+break; case endv: print_str("end of alignment template");@+break; case spacer: chr_cmd("blank space ")@;@+break; case letter: chr_cmd("the letter ")@;@+break; case other_char: chr_cmd("the character ")@;@+break; @t\4@>@@; default:print_str("[unknown command code!]"); } } @ Here is a procedure that displays the current command. @p void show_cur_cmd_chr(void) {@+begin_diagnostic();print_nl("{"); if (mode!=shown_mode) {@+print_mode(mode);print_str(": ");shown_mode=mode; } print_cmd_chr(cur_cmd, cur_chr);print_char('}'); end_diagnostic(false); } @* Input stacks and states. This implementation of \TeX\ uses two different conventions for representing sequential stacks. @^stack conventions@>@^conventions for representing stacks@> \yskip\hangg 1) If there is frequent access to the top entry, and if the stack is essentially never empty, then the top entry is kept in a global variable (even better would be a machine register), and the other entries appear in the array $\\{stack}[0\to(\\{ptr}-1)]$. For example, the semantic stack described above is handled this way, and so is the input stack that we are about to study. \yskip\hangg 2) If there is infrequent top access, the entire stack contents are in the array $\\{stack}[0\to(\\{ptr}-1)]$. For example, the |save_stack| is treated this way, as we have seen. \yskip\noindent The state of \TeX's input mechanism appears in the input stack, whose entries are records with six fields, called |state|, |index|, |start|, |loc|, |limit|, and |name|. This stack is maintained with convention~(1), so it is declared in the following way: @= typedef struct { quarterword @!state_field, @!index_field; halfword @!start_field, @!loc_field, @!limit_field, @!name_field; } in_state_record; @ @= in_state_record @!input_stack[stack_size+1]; uint8_t @!input_ptr; /*first unused location of |input_stack|*/ uint8_t @!max_in_stack; /*largest value of |input_ptr| when pushing*/ in_state_record @!cur_input; /*the ``top'' input state, according to convention (1)*/ @ We've already defined the special variable |loc====cur_input.loc_field| in our discussion of basic input-output routines. The other components of |cur_input| are defined in the same way: @d state cur_input.state_field /*current scanner state*/ @d index cur_input.index_field /*reference for buffer information*/ @d start cur_input.start_field /*starting position in |buffer|*/ @d limit cur_input.limit_field /*end of current line in |buffer|*/ @d name cur_input.name_field /*name of the current file*/ @ Let's look more closely now at the control variables (|state|,~|index|,~|start|,~|loc|,~|limit|,~|name|), assuming that \TeX\ is reading a line of characters that have been input from some file or from the user's terminal. There is an array called |buffer| that acts as a stack of all lines of characters that are currently being read from files, including all lines on subsidiary levels of the input stack that are not yet completed. \TeX\ will return to the other lines when it is finished with the present input file. (Incidentally, on a machine with byte-oriented addressing, it might be appropriate to combine |buffer| with the |str_pool| array, letting the buffer entries grow downward from the top of the string pool and checking that these two tables don't bump into each other.) The line we are currently working on begins in position |start| of the buffer; the next character we are about to read is |buffer[loc]|; and |limit| is the location of the last character present. If |loc > limit|, the line has been completely read. Usually |buffer[limit]| is the |end_line_char|, denoting the end of a line, but this is not true if the current line is an insertion that was entered on the user's terminal in response to an error message. The |name| variable is a string number that designates the name of the current file, if we are reading a text file. It is zero if we are reading from the terminal; it is |n+1| if we are reading from input stream |n|, where |0 <= n <= 16|. (Input stream 16 stands for an invalid stream number; in such cases the input is actually from the terminal, under control of the procedure |read_toks|.) The |state| variable has one of three values, when we are scanning such files: $$\baselineskip 15pt\vbox{\halign{#\hfil\cr 1) |state==mid_line| is the normal state.\cr 2) |state==skip_blanks| is like |mid_line|, but blanks are ignored.\cr 3) |state==new_line| is the state at the beginning of a line.\cr}}$$ These state values are assigned numeric codes so that if we add the state code to the next character's command code, we get distinct values. For example, `|mid_line+spacer|' stands for the case that a blank space character occurs in the middle of a line when it is not being ignored; after this case is processed, the next value of |state| will be |skip_blanks|. @d mid_line 1 /*|state| code when scanning a line of characters*/ @d skip_blanks (2+max_char_code) /*|state| code when ignoring blanks*/ @d new_line (3+max_char_code+max_char_code) /*|state| code at start of line*/ @ Additional information about the current line is available via the |index| variable, which counts how many lines of characters are present in the buffer below the current level. We have |index==0| when reading from the terminal and prompting the user for each line; then if the user types, e.g., `\.{\\input paper}', we will have |index==1| while reading the file \.{paper.tex}. However, it does not follow that |index| is the same as the input stack pointer, since many of the levels on the input stack may come from token lists. For example, the instruction `\.{\\input paper}' might occur in a token list. The global variable |in_open| is equal to the |index| value of the highest non-token-list level. Thus, the number of partially read lines in the buffer is |in_open+1|, and we have |in_open==index| when we are not reading a token list. If we are not currently reading from the terminal, or from an input stream, we are reading from the file variable |input_file[index]|. We use the notation |terminal_input| as a convenient abbreviation for |name==0|, and |cur_file| as an abbreviation for |input_file[index]|. The global variable |line| contains the line number in the topmost open file, for use in error messages. If we are not reading from the terminal, |line_stack[index]| holds the line number for the enclosing level, so that |line| can be restored when the current file has been read. Line numbers should never be negative, since the negative of the current line number is used to identify the user's output routine in the |mode_line| field of the semantic nest entries. If more information about the input state is needed, it can be included in small arrays like those shown here. For example, the current page or segment number in the input file might be put into a variable |@!page|, maintained for enclosing levels in `\ignorespaces|@!page_stack: array[1 dotdot max_in_open]int|\unskip' by analogy with |line_stack|. @^system dependencies@> @d terminal_input (name==0) /*are we reading from the terminal?*/ @d cur_file input_file[index] /*the current |alpha_file| variable*/ @= uint8_t @!in_open; /*the number of lines in the buffer, less one*/ uint8_t @!open_parens; /*the number of open text files*/ alpha_file @!input_file0[max_in_open], *const @!input_file = @!input_file0-1; int @!line; /*current line number in the current source file*/ int @!line_stack0[max_in_open], *const @!line_stack = @!line_stack0-1; @ Users of \TeX\ sometimes forget to balance left and right braces properly, and one of the ways \TeX\ tries to spot such errors is by considering an input file as broken into subfiles by control sequences that are declared to be \.{\\outer}. A variable called |scanner_status| tells \TeX\ whether or not to complain when a subfile ends. This variable has six possible values: \yskip\hang|normal|, means that a subfile can safely end here without incident. \yskip\hang|skipping|, means that a subfile can safely end here, but not a file, because we're reading past some conditional text that was not selected. \yskip\hang|defining|, means that a subfile shouldn't end now because a macro is being defined. \yskip\hang|matching|, means that a subfile shouldn't end now because a macro is being used and we are searching for the end of its arguments. \yskip\hang|aligning|, means that a subfile shouldn't end now because we are not finished with the preamble of an \.{\\halign} or \.{\\valign}. \yskip\hang|absorbing|, means that a subfile shouldn't end now because we are reading a balanced token list for \.{\\message}, \.{\\write}, etc. \yskip\noindent If the |scanner_status| is not |normal|, the variable |warning_index| points to the |eqtb| location for the relevant control sequence name to print in an error message. @d skipping 1 /*|scanner_status| when passing conditional text*/ @d defining 2 /*|scanner_status| when reading a macro definition*/ @d matching 3 /*|scanner_status| when reading macro arguments*/ @d aligning 4 /*|scanner_status| when reading an alignment preamble*/ @d absorbing 5 /*|scanner_status| when reading a balanced text*/ @= uint8_t @!scanner_status; /*can a subfile end now?*/ pointer @!warning_index; /*identifier relevant to non-|normal| scanner status*/ pointer @!def_ref; /*reference count of token list being defined*/ @ Here is a procedure that uses |scanner_status| to print a warning message when a subfile has ended, and at certain other crucial times: @= void runaway(void) {@+pointer p; /*head of runaway list*/ if (scanner_status > skipping) {@+print_nl("Runaway "); @.Runaway...@> switch (scanner_status) { case defining: {@+print_str("definition");p=def_ref; } @+break; case matching: {@+print_str("argument");p=temp_head; } @+break; case aligning: {@+print_str("preamble");p=hold_head; } @+break; case absorbing: {@+print_str("text");p=def_ref; } } /*there are no other cases*/ print_char('?');print_ln();show_token_list(link(p), null, error_line-10); } } @ However, all this discussion about input state really applies only to the case that we are inputting from a file. There is another important case, namely when we are currently getting input from a token list. In this case |state==token_list|, and the conventions about the other state variables are different: \yskip\hang|loc| is a pointer to the current node in the token list, i.e., the node that will be read next. If |loc==null|, the token list has been fully read. \yskip\hang|start| points to the first node of the token list; this node may or may not contain a reference count, depending on the type of token list involved. \yskip\hang|token_type|, which takes the place of |index| in the discussion above, is a code number that explains what kind of token list is being scanned. \yskip\hang|name| points to the |eqtb| address of the control sequence being expanded, if the current token list is a macro. \yskip\hang|param_start|, which takes the place of |limit|, tells where the parameters of the current macro begin in the |param_stack|, if the current token list is a macro. \yskip\noindent The |token_type| can take several values, depending on where the current token list came from: \yskip\hang|parameter|, if a parameter is being scanned; \hang|u_template|, if the \ part of an alignment template is being scanned; \hang|v_template|, if the \ part of an alignment template is being scanned; \hang|backed_up|, if the token list being scanned has been inserted as `to be read again'. \hang|inserted|, if the token list being scanned has been inserted as the text expansion of a \.{\\count} or similar variable; \hang|macro|, if a user-defined control sequence is being scanned; \hang|output_text|, if an \.{\\output} routine is being scanned; \hang|every_par_text|, if the text of \.{\\everypar} is being scanned; \hang|every_math_text|, if the text of \.{\\everymath} is being scanned; \hang|every_display_text|, if the text of \.{\\everydisplay} is being scanned; \hang|every_hbox_text|, if the text of \.{\\everyhbox} is being scanned; \hang|every_vbox_text|, if the text of \.{\\everyvbox} is being scanned; \hang|every_job_text|, if the text of \.{\\everyjob} is being scanned; \hang|every_cr_text|, if the text of \.{\\everycr} is being scanned; \hang|mark_text|, if the text of a \.{\\mark} is being scanned; \hang|write_text|, if the text of a \.{\\write} is being scanned. \yskip\noindent The codes for |output_text|, |every_par_text|, etc., are equal to a constant plus the corresponding codes for token list parameters |output_routine_loc|, |every_par_loc|, etc. The token list begins with a reference count if and only if |token_type >= macro|. @^reference counts@> @d token_list 0 /*|state| code when scanning a token list*/ @d token_type index /*type of current token list*/ @d param_start limit /*base of macro parameters in |param_stack|*/ @d parameter 0 /*|token_type| code for parameter*/ @d u_template 1 /*|token_type| code for \ template*/ @d v_template 2 /*|token_type| code for \ template*/ @d backed_up 3 /*|token_type| code for text to be reread*/ @d inserted 4 /*|token_type| code for inserted texts*/ @d macro 5 /*|token_type| code for defined control sequences*/ @d output_text 6 /*|token_type| code for output routines*/ @d every_par_text 7 /*|token_type| code for \.{\\everypar}*/ @d every_math_text 8 /*|token_type| code for \.{\\everymath}*/ @d every_display_text 9 /*|token_type| code for \.{\\everydisplay}*/ @d every_hbox_text 10 /*|token_type| code for \.{\\everyhbox}*/ @d every_vbox_text 11 /*|token_type| code for \.{\\everyvbox}*/ @d every_job_text 12 /*|token_type| code for \.{\\everyjob}*/ @d every_cr_text 13 /*|token_type| code for \.{\\everycr}*/ @d mark_text 14 /*|token_type| code for \.{\\topmark}, etc.*/ @d write_text 15 /*|token_type| code for \.{\\write}*/ @ The |param_stack| is an auxiliary array used to hold pointers to the token lists for parameters at the current level and subsidiary levels of input. This stack is maintained with convention (2), and it grows at a different rate from the others. @= pointer @!param_stack[param_size+1]; /*token list pointers for parameters*/ uint8_t @!param_ptr; /*first unused entry in |param_stack|*/ int @!max_param_stack; /*largest value of |param_ptr|, will be | <= param_size+9|*/ @ The input routines must also interact with the processing of \.{\\halign} and \.{\\valign}, since the appearance of tab marks and \.{\\cr} in certain places is supposed to trigger the beginning of special \ template text in the scanner. This magic is accomplished by an |align_state| variable that is increased by~1 when a `\.{\char'173}' is scanned and decreased by~1 when a `\.{\char'175}' is scanned. The |align_state| is nonzero during the \ template, after which it is set to zero; the \ template begins when a tab mark or \.{\\cr} occurs at a time that |align_state==0|. @= int @!align_state; /*group level with respect to current alignment*/ @ Thus, the ``current input state'' can be very complicated indeed; there can be many levels and each level can arise in a variety of ways. The |show_context| procedure, which is used by \TeX's error-reporting routine to print out the current input state on all levels down to the most recent line of characters from an input file, illustrates most of these conventions. The global variable |base_ptr| contains the lowest level that was displayed by this procedure. @= uint8_t @!base_ptr; /*shallowest level shown by |show_context|*/ @ The status at each level is indicated by printing two lines, where the first line indicates what was read so far and the second line shows what remains to be read. The context is cropped, if necessary, so that the first line contains at most |half_error_line| characters, and the second contains at most |error_line|. Non-current input levels whose |token_type| is `|backed_up|' are shown only if they have not been fully read. @p void show_context(void) /*prints where the scanner is*/ {@+ uint8_t old_setting; /*saved |selector| setting*/ int @!nn; /*number of contexts shown so far, less one*/ bool @!bottom_line; /*have we reached the final context to be shown?*/ @@; base_ptr=input_ptr;input_stack[base_ptr]=cur_input; /*store current state*/ nn=-1;bottom_line=false; loop@+{@+cur_input=input_stack[base_ptr]; /*enter into the context*/ if ((state!=token_list)) if ((name > 17)||(base_ptr==0)) bottom_line=true; if ((base_ptr==input_ptr)||bottom_line||(nn < error_context_lines)) @@; else if (nn==error_context_lines) {@+print_nl("...");incr(nn); /*omitted if |error_context_lines < 0|*/ } if (bottom_line) goto done; decr(base_ptr); } done: cur_input=input_stack[input_ptr]; /*restore original state*/ } @ @= {@+if ((base_ptr==input_ptr)||(state!=token_list)|| (token_type!=backed_up)||(loc!=null)) /*we omit backed-up token lists that have already been read*/ {@+tally=0; /*get ready to count characters*/ old_setting=selector; if (state!=token_list) {@+@; @; } else{@+@; @; } selector=old_setting; /*stop pseudoprinting*/ @; incr(nn); } } @ This routine should be changed, if necessary, to give the best possible indication of where the current line resides in the input file. For example, on some systems it is best to print both a page and line number. @^system dependencies@> @= if (name <= 17) if (terminal_input) if (base_ptr==0) print_nl("<*>");else print_nl(" "); else{@+print_nl(" print_char('>'); } else{@+print_nl("l.");print_int(line); } print_char(' ') @ @= switch (token_type) { case parameter: print_nl(" ");@+break; case u_template: case v_template: print_nl("