% -*- mode: Noweb; noweb-code-mode: icon-mode -*- \section{Converting {\tt noweb} markup to {\TeX} markup (Icon version)} The copyright applies both to the {\tt noweb} source and to the generated shell script. <>= # Copyright 1991 by Norman Ramsey. All rights reserved. # See file COPYRIGHT for more information. @ Here's the organization of the source: <<*>>= <> procedure rcsinfo () return "$Id: totex.nw,v 1.21 2008/10/06 01:03:05 nr Exp nr $" || "$Name: v2_12 $" end global headercomment, realwrite procedure main(args) local delay, name <> delay := !args == "-delay" noindex := !args == "-noindex" if !args == "-no-gen-comment" then &null else {realwrite := write; write := firstwrite} while inputline := read() do inputline ? { <> } write() end @ The markup carefully adds no newlines not already present in the input, so that the line numbers of the {\TeX} file will be the same as the numbers of the corresponding {\tt noweb} file. The variables are: \begin{description} \item[\tt code] Nonnull if converting a code chunk. \item[\tt quoting] Nonnull if quoting code in documentation. \item[\tt text] Number of characters written since start of documentation chunk. \end{description} [[text]] is used to write [[\nwdocspar]] if a newline appears at the beginning of a documentation chunk without any intervening text. This subtle trick preserves new-paragraph semantics without requiring the insertion of a blank line that would throw off the line count. The special control sequences makes a page break at that spot especially likely, so that small documentation chunks will appear on the some page as the code chunks they precede. <>= text := 1 @ %def text Ordering helps improve speed, so I write things in a funny order. This whole code is a giant [[if ... then ... else if ...]]. <>= <<@text>> <<@nl>> <> <<@defn>> <> <<@use>> <<@xref>> <<@index>> <> if ="@fatal " then { # follows last else # write(&errout, "Noweb error in stage ", tab(upto(' ')), ":", tab(0)) exit(1) } else if ="@" then warn_unknown(1(tab(upto(' ')|0), pos(0) | move(1))) else write(&errout, "Botched line in noweb pipeline: ", tab(0)) @ \subsection{Basic text and chunk boundaries} <>= if ="@begin code " then { code := 1 ; writes("\\nwbegincode{", tab(0), "}") <> } else if ="@end code " then { <> code := &null ; writes("\\nwendcode{}") lastdefnlabel := pendingprev := pendingnext := &null } else <>= if ="@begin docs " then { if \delay & match(0) then <> else {text := 0; writes("\\nwbegindocs{",tab(0),"}")} } else if ="@end docs " then { if \delay & match(0) then <> else writes("\\nwenddocs{}") } else <<@text>>= if ="@text " then { text +:= *(line := tab(0)) writes(if \quoting then <> else if \code then <> else line) lastindexref := &null } else <>= if /noindex & \lastindexref then "\\nwlinkedidentq{" || TeXliteral(line) || "}{" || lastindexref || "}" else TeXliteral(line) <>= if /noindex & \lastindexref then "\\nwlinkedidentc{" || escape(line, '{}\\') || "}{" || lastindexref || "}" else escape(line, '{}\\') <<@nl>>= if ="@nl" & pos(0) then { if /code then {<>} if \quoting then writes("\\nwnewline") <> write() } else @ Delaying markup is handled by special patterns for the first document chunk. Because several {\tt noweb} files can be marked up at once, there can be several document chunks numbered 0. The later ones are given no special treatment by the simple expedient of turning [[delay]] off after the first one. <>= &null <>= { writes("\\nwfilename{", filename, "}"); delay := &null } @ <>= if text = 0 then writes("\\nwdocspar") text := 1 @ \subsection{Chunk definitions and uses, with possible cross-reference} Here we start to see the cross-reference markup, driven by [[lastxreflabel]] and [[lastxrefref]]. <<@defn>>= if ="@defn " then { writes("\\sublabel{", \lastxreflabel, "}") writes("\\nwmargintag{", label2tag(\lastxreflabel), "}") writes("\\moddef{", convquotes(thischunk := tab(0)), ("~" || label2tag(\lastxrefref)) | "", "}\\", defns[thischunk], "endmoddef") useitems := \useitemstab[thischunk] pendinguses := 1 lastdefnlabel := lastxreflabel <> defns[thischunk] := "plus" } else @ [[useitemstab]] enables us to show uses even on later instances of a chunk, although the index filter only provides uses with the first chunk. <>= useitemstab := table() <<@use>>= if ="@use " then { writes("\\LA{}", convquotes(name := tab(0)), ("~" || label2tag(\lastxrefref)) | "", "\\RA{}") <> } else <<*>>= procedure label2tag(label) return "{\\nwtagstyle{}\\subpageref{" || label || "}}" end @ [[defns]] serves only to give the proper distinction between [[\endmoddef]] and [[\plusendmoddef]]. <>= defns := table("") @ \subsection{Quoting, headers, trailers, \& miscellany} <>= if ="@quote" & pos(0) then { quoting := 1 ; writes("{\\Tt{}") } else if ="@endquote" & pos(0) then { quoting := &null ; writes("\\nwendquote}") } else if ="@file " then { filename := tab(0); <> \delay | writes("\\nwfilename{", filename, "}") } else if ="@line " then { } else if ="@literal " then { writes(tab(0)) } else if ="@header latex " then { <> } else if ="@header tex " then { writes("\\input nwmac ") } else if ="@trailer latex" & pos(0) then { write("\\end{document}") } else if ="@trailer tex" & pos(0) then { write("\\bye") } else <>= writes("\\documentclass{article}\\usepackage{noweb}\\pagestyle{noweb}\\noweboptions{", tab(0), "}\\begin{document}") @ \subsection{Cross-reference and index support} \subsubsection{Chunk cross-reference} We begin with basic cross-reference [[@xref label]] and [[@xref ref]], then show the chunk cross-reference that comes at the end of a code chunk. The {\LaTeX} back end ignores [[@xref nextdef]] and [[@xref prevdef]]. <<@xref>>= if ="@xref " then { <<@xref tests>> <> } else <>= every defitems | useitems := [] notused := &null @ By resetting the cross-reference info after dumping, we make it possible to dump both before index stuff and before end of chunk, without having to do any checking. <>= dumpitems(defitems, "nwalsodefined") dumpitems(useitems, "nwused") writes("\\nwnotused{", \notused, "}") <> <<@xref tests>>= if ="label " then { lastxreflabel := tab(0) } else if ="ref " then { lastxrefref := tab(0) } else if ="begindefs" & pos(0) then { } else if ="defitem " then { put(defitems, tab(0)) } else if ="enddefs" & pos(0) then { } else if ="beginuses" & pos(0) then { useitems := [] } else if ="useitem " then { put(useitems, tab(0)) } else if ="enduses" & pos(0) then { useitemstab[thischunk] := useitems } else if ="notused " then { notused := tab(0) } else if ="nextdef " then { pendingnext := tab(0) } else if ="prevdef " then { pendingprev := tab(0) } else <>= if \pendinguses | \pendingprev | \pendingnext then { writes("\\nwstartdeflinemarkup") <> writes("\\nwenddeflinemarkup") pendinguses := pendingprev := pendingnext := &null } <>= if \pendinguses then { dumpitems(useitems, "nwusesondefline") } if \pendingprev | \pendingnext then { writes("\\nwprevnextdefs{", \pendingprev | "\\relax", "}{", \pendingnext | "\\relax", "}") } <>= every lastxreflabel | lastxrefref := &null <>= warn_unknown("xref " || tab(upto(' \t') | 0)) <<*>>= procedure dumpitems(items, cs) if *\items > 0 then { writes("\\", cs, "{") every writes("\\\\{", !items, "}") writes("}") return } else fail end @ \subsubsection{Identifier cross-reference, i.e.\ index} <<@index>>= if ="@index " then { <<@index tests>> <> } else <>= warn_unknown("index " || tab(upto(' \t') | 0)) @ This first round of stuff just handles the basics: definitions, uses, and newlines. Unless it's OK to show the index (by \LA{}SI\RA), we handle nothing but \hbox{[[@index nl]]}. <<@index tests>>= if ="nl" & pos(0) then { write(if \code then "\\eatline" else "%")}else if =("defn "|"localdefn ") then <> { <> } else if ="use " then <> { <> } else <>= /noindex & @ Nothing is involved in handling definitions and uses unless there are cross-reference labels pending. An index definition or use has its own [[@xref label]] only if it's in documentation; if it's in code, we use the anchor label of the definition. (You don't have to know that to understand what happens here, but I thought you might like to.) <>= writes("\\nosublabel{", \lastxreflabel, "}") writes("\\nwindexdefn{\\nwixident{", TeXliteral(name := tab(0)), "}}{", indexlabel(name), "}{", \lastxrefref, "}") <> @ The {\LaTeX} back end ignores uses in code; they get bundled up by a previous filter (the cross-referencer) and handled elsewhere. <>= if /code & /quoting then { writes("\\protect\\nosublabel{", \lastxreflabel, "}") writes("\\protect\\nwindexuse{\\nwixident{", TeXliteral(name := tab(0)), "}}{", indexlabel(name), "}{", \lastxrefref, "}") } lastindexref := lastxrefref <> @ Here's the local identifier cross-reference that appears at the end of a code chunk. We guard everything with \LA{}SI\RA, as before. <<@index tests>>= if ="begindefs" & pos(0) then <> { <> writes("\\nwidentdefs{") } else if ="isused " then <> { "handled by latex" } else if ="defitem " then <> { i := tab(0); <> } else if ="enddefs" & pos(0) then <> { writes("}") } else if ="beginuses" & pos(0) then <> { <> writes("\\nwidentuses{"); ulist := [] } else if ="isdefined " then <> { "latex finds the definitions" } else if ="useitem " then <> { i := tab(0); <> put(ulist, i); } else if ="enduses" & pos(0) then <> { writes("}"); <> } else <>= writes("\\\\{{\\nwixident{", TeXliteral(i), "}}{", indexlabel(i), "}}") <>= every i := !ulist do writes("\\nwindexuse{\\nwixident{", TeXliteral(i), "}}{", indexlabel(i), "}{", \lastdefnlabel, "}") @ \subsubsection{The list of chunks and the index} The treatments of the list of chunks and the index are similar. Both use [[\nwixlogsorted]], which writes magic goo into the {\tt .aux} file. The real cross-referencing is done by the underlying {\LaTeX} code. <<@xref tests>>= if ="beginchunks" & pos(0) then { } else if ="chunkbegin " then { label := tab(upto(' ')); =" " writes("\\nwixlogsorted{c}{{", convquotes(tab(0)), "}{", label, "}{") } else if ="chunkuse " then { writes("\\nwixu{", tab(0), "}") } else if ="chunkdefn " then { writes("\\nwixd{", tab(0), "}") } else if ="chunkend" & pos(0) then { write("}}%") } else if ="endchunks" & pos(0) then { } else <<@index tests>>= if ="beginindex" & pos(0) then <> { } else if ="entrybegin " then <> { label := tab(upto(' ')); =" "; name := tab(0) write("\\nwixlogsorted{i}{{\\nwixident{", TeXliteral(name), "}}{", indexlabel(name), "}}%") } else if ="entryuse " then <> { "handled by latex" } else if ="entrydefn " then <> { "handled by latex" } else if ="entryend" & pos(0) then <> { } else if ="endindex" & pos(0) then <> { } else @ \subsection{HEader comments} This godawful hack slips in a comment without messing up our line numbers. <<*>>= procedure firstwrite(L[]) write := realwrite put(L, "% ===> this file was generated automatically by noweave --- better not edit it") return write!L end @ \subsection{Utility procedures} <<*>>= procedure escape(line, chars, prefix) /prefix := "\\" line ? { s := "" while s ||:= tab(upto(chars)) do s ||:= prefix || move(1) return s || tab(0) } end <<*>>= global TeXspecials <>= TeXspecials := '\\{}$&#^_ ~%' @ I can't use [[\\char`\%]] and similar sequences with latex2e, because [[`]] is an active character that suppresses ligatures. So I got TeX to print out the actual character codes for me. This string ([['\\{}$&#^_ ~%']]) should serve as a test. Unfortunately, the character codes for these characters are not the same across fonts. While this is not a problem when everything is in a typewriter font, it can cause major strangenesses when another font is used (Icon, for example, is often typeset in a sans-serif font). Codes for `[[$]]', `[[&]]', `[[#]]', `[[^]]', `[[_]]', `[[~]]' and `[[%]]' use font-independent macros. Since there is no font-independent way to typeset `[[\]]', `[[{]]' and `[[}]]',\footnote{Although both plain \TeX\ and \LaTeX\ provide `[[\{]]' and `[[\}]], plain \TeX\ only allows their use in math mode.} we generate special macros defined in \verb"noweb.sty" and \verb"nwmac.tex". The default definitions are geared to typewriter fonts, but they can be redefined as necessary. <<*>>= procedure TeXliteral(arg) static nospace, code initial { codes := ["\\", "nwbackslash", "{", "nwlbrace", "}", "nwrbrace", "$", "$", "&", "&", "#", "#", "^", "char94", "_", "_", "%", "%", "~", "char126"] code := table() while (c := get(codes), n := get(codes)) do code[c] := string(n) if c := !TeXspecials & c ~== " " & not member(code, c) then fatal("internal error, character-code mismatch, report a bug!") } s := "" arg ? { while s ||:= tab(upto(TeXspecials)) do { c := move(1) if member(code, c) then s ||:= "{\\" || code[c] || "}" else s ||:= "\\" || c } return s || tab(0) } end @ A special function is used to implement {\tt noweb}'s quoting convention within chunk names. <<*>>= procedure convquotes(s) r := "" s ? { while r ||:= tab(find("[[")) do { ="[[" | fatal("impossible missing [[") r ||:= "\\code{}" || TeXliteral(tab(find("]]"))) r ||:= tab(many(']')-2) ="]]" | fatal("impossible missing ]]") r ||:= "\\edoc{}" } return r || tab(0) } end <<*>>= procedure warn_unknown(tag) static warned initial warned := set() if not member(warned, tag) then { write(&errout, "Warning: unrecognized escape @", tag, " ", tab(0)) insert(warned, tag) } return end @ This gets special characters out of the labels used by identifiers. <<*>>= procedure indexlabel(ident) static badset, trans initial { <> badset := '' every badset ++:= key(trans) } ident ? { s := "" while s ||:= tab(upto(badset)) do s ||:= ":" || trans[move(1)] return s || tab(0) } end <>= trans := table() trans[" "] := "sp" # space trans["#"] := "has" # hash trans["$"] := "do" # dollar trans["%"] := "pe" # percent trans["&"] := "am" # ampersand trans[","] := "com" # commad trans[":"] := "col" # colon trans["\\"] := "bs" # backslash trans["^"] := "hat" # hat trans["_"] := "un" # underscore trans["{"] := "lb" # left brace trans["}"] := "rb" # right brace trans["~"] := "ti" # tilde <<*>>= procedure fatal(L[]) write!([&errout, "noweb error in tohtml: "] ||| L) exit(1) end