% -*- mode: Noweb; noweb-code-mode: icon-mode -*- \documentstyle[noweb]{article} \pagestyle{noweb} \begin{document} @ \section{Converting {\tt noweb} markup to {\tt HTML}} This copyright applies both to the {\tt noweb} source and to the generated code. Thanks to Bill Trost for getting me started with an early version. <>= # Copyright 1994 by Norman Ramsey. All rights reserved. # See file COPYRIGHT for more information. @ The [[-raw]] option brackets HTML with [[\begin{rawhtml}]] and [[\end{rawhtml}]]; the purpose is to embed HTML in a {\LaTeX} document before converting the document with {\tt latex2html}. [[braw]] and [[eraw]] hold those delimiters (or else empty strings). <<*>>= <> # Don't try to understand this file! Look at icon/tohtml.nw in the noweb source! global braw, eraw procedure main(args) local delay, raw, where, localindex, noindex, shortxref <> every braw | eraw := "" delay := !args == "-delay" noindex := !args == "-noindex" shortxref := if !args == "-longxref" then &null else 1 localindex := (/noindex, !args == "-localindex") # produce local identifier xref? raw := !args == "-raw" if \raw then {braw := "\\begin{rawhtml}"; eraw := "\\end{rawhtml}"} if !args == "-no-gen-comment" then &null else write(braw, "", eraw) while inputline := read() do inputline ? { <> } write() end <>= <<@text>> <<@nl>> <> <<@defn>> <> <<@use>> <<@xref>> <<@index>> <> if match("@fatal ") then { # follows last else exit(1) } else if ="@" then warn_unknown(1(tab(upto(' ')|0), pos(0) | move(1))) else fatal("Botched line in noweb pipeline: ", tab(0)) @ [[ecode]] is the marker used at the end of the current code chunk. If there is no cross-reference stuff at the end, we just use [[]]; otherwise we terminate whatever environment is used for the cross-reference stuff. <>= if ="@begin code " then { code := 1 ; thischunk := &null <> writes(braw, "
"); ecode := "
" } else if ="@end code " then { <> code := nil ; previscode := 1 writes(ecode, eraw) } else @ We maintain [[thischunk]] null until we've seen [[@defn]]. We want to try to avoid emitting paragraph elements when the preceding chunk is a code chunk, as tracked by [[previscode]]. Also, if we do slip in a paragraph, we may use the {\LaTeX} style. <>= if ="@begin docs " then { if \previscode then writes(if /raw then "

" else "\\par") previscode := &null text := 0 } else @ Sometimes it happens that a document-chunk anchor is put in a document chunk that contains no text. In that case, we put in a phony anchor at the end of the chunk so we won't lose the cross-reference. <>= if ="@end docs " then { write(linklabel(\lastxreflabel, "*")) lastxreflabel := &null } else @ Normally, if there's a pending anchor, we put it on the first available text line. There's a bit of a fine point that crops up if the very first piece of text is quoted code. In that case we have to attach both the label for the {\it docs} anchor and the ref for the {\it index} anchor. <<@text>>= if ="@text " then { text +:= *(line := tab(0)) if \code then writes((<>) | escapeSpecials(line)) else if \quoting then writes((<>) | escapeSpecials(line)) else writes((<>) | line) } else <>= 2(line ? {tab(many(' \t')); not pos(0)}, linkto(\lastindexref, escapeSpecials(line)), lastindexref := &null) @ We anchor on the first nonblank character of the line, unless that's an SGML tag, in which case we have to skip past. None of this crap would be necessary if HTML could anchor to empty text. <>= 1(line ? skip_white_tags() || linklabel(\lastxreflabel, skip_chars()) || tab(0), insert(defns_above, \lastxreflabel), lastxreflabel := &null) <>= 2(line ? {tab(many(' \t')); not pos(0)}, line ? linklabel(\lastxreflabel, (tab(many(' \t')) | "") || (skip_tags_and_char() | "*")) || tab(0), insert(defns_above, \lastxreflabel), lastxreflabel := &null) @ An indexed identifier is not empty and contains no tags, so we don't have to worry. <>= 2(line ? {tab(many(' \t')); not pos(0)}, linklabelto(lastxreflabel, lastindexref, escapeSpecials(line), "bare"), lastxreflabel := lastindexref := &null) @ Skip as many tags as possible. Argument is number of tags already skipped; if nonzero, we're willing to succeed at the end of the string. <<*>>= procedure skip_white_tags() suspend thewhite() || ((="<" || thewhite() || (="/" || thewhite() | "") || (tab(many(&letters)) | ="!--") || tab(upto(">")) || =">" || skip_white_tags() | "") \ 1) end <<*>>= procedure skip_chars() if not any('<') & upto('<') then suspend tab(upto('<')) else if any('&') & upto(';') then suspend (tab(upto(';')) || =";") else if (any('<') & upto('>')) | any(' \t') then (write(&errout, "broken skip_chars, report to nr@eecs.harvard.edu: ", image(tab(0))), &fail) else if any('<') then (write(&errout, "warning: can't find end of HTML tag: ", image(tab(0)), "\n\tquoted code embedded in comment?"), &fail) else if pos(0) then fail else suspend tab(0) end procedure optwhite() suspend tab(many(' \t')) | "" end procedure thewhite() return optwhite() \ 1 end <<*>>= procedure skip_tags_and_char(count) local tag /count := 0 if any('&') & upto(';') then suspend(tab(upto(';')) || =";") else if any('<') then suspend ="<" || (tab(many(' \t')) | "") || (="/" | (tag := tab(many(&letters)))) || tab(upto(">")) || =">" || (if map(tag) == "a" then tab(upto("<")) || ="<" || optwhite() || ="/" || optwhite() || (("A"|"a") == tab(many(&letters))) || optwhite || =">" else "") || skip_tags_and_char(count+1) else suspend (tab(many(' \t')) | "") || ((move(1) | if count > 0 then "")\1) # succeed at end if count > 0 end @ The preceding scheme sometimes wraps an anchor around a tag. If that turns out to be bad, we could try the following alternative: <>= 2(line ? {tab(many(' \t')); not pos(0)}, line ? scan_initial_tags() || linklabel(\lastxreflabel, (tab(many(' \t')) | "") || (scan_past_char() | "*")) || tab(0), insert(defns_above, \lastxreflabel), lastxreflabel := &null) <>= procedure scan_initial_tags() suspend (="<" || (tab(many(' \t')) | "") || (="/" | (tag := tab(many(&letters)), map(tag) ~== "a", tag)) || tab(upto(">")) || =">" || scan_initial_tags()) | "" end procedure scan_past_char() if any('&') & upto(';') then suspend(tab(upto(';')) || =";") else suspend (tab(many(' \t')) | "") || tab(any(~'<')) end <<@nl>>= if ="@nl" & pos(0) then { <> write() } else <<@defn>>= if ="@defn " then { writechunk(lastxreflabel, lastxrefref, "dfn", thischunk := tab(0), defns[thischunk] || "=") insert(defns_above, \lastxreflabel) useitems := \useitemstab[thischunk] pendinguses := 1 <> defns[thischunk] := "+" } else <>= useitemstab := table() defns := table("") defns_above := set() # keep track of defining chunks we've seen <<@use>>= if ="@use " then { writechunk(lastxreflabel, lastxrefref, "i", tab(0)) <> } else @ Writing a chunk involves creating an anchor for it. The anchor is ``bare'' because it's always in an HTML-containing place. <<*>>= procedure writechunk(label, ref, tag, name, suffix) /suffix := "" writes(linklabelto(label, ref, sgmlwrap(tag, "<" || convquotes(name) || ">" || suffix), "bare")) return end @ <>= if ="@quote" & pos(0) then { quoting := 1 ; writes(braw, "") } else if ="@endquote" & pos(0) then { quoting := nil ; writes("", eraw) } else <>= if ="@file " then { filename := tab(0); <> } else if ="@literal " then { writes(tab(0)) } else if ="@header html " then { <> } else if ="@trailer html" & pos(0) then { <> } else @ <>= writes("", tab(0), "") <>= write("") @ <<@xref>>= if ="@xref " then { if fun := tab(upto(' ')) then {move(1); arg := tab(0)} else { fun := tab(0); arg := &null } case fun of { <> default : (\arg|"") ? warn_unknown("xref " || fun) } } else @ [[useitemstab]] seems to exist to enable us to repeat the uses for each chunk, even though the [[noidx]] filter only provides them once. I think it may prove better to provide use information for each instance of a chunk. <>= "label" : { <>; lastxreflabel := arg } "ref" : { <>; lastxrefref := arg } "prevdef" : pendingprev := arg "nextdef" : pendingnext := arg "beginuses" : useitems := [] "useitem" : put(useitems, arg) "enduses" : useitemstab[\thischunk] := useitems "notused" : notused := arg <>= every lastxreflabel | lastxrefref := &null <>= warn_unused_xref("label", \lastxreflabel) <>= warn_unused_xref("ref", \lastxrefref) <<*>>= procedure warn_unused_xref(tag, label) static warned initial warned := set() if not member(warned, tag) then { insert(warned, tag) write(&errout, "Warning: internal inconsistency in noweb (not urgent)---") write(&errout, "\tnever used @xref ", tag, " ", label) } return end @ <>= if \shortxref & (\pendinguses | \pendingprev | \pendingnext | \notused) then { <> pendinguses := pendingprev := pendingnext := notused := &null } <>= if \pendinguses & *\useitems > 0 then { writes(" (") every i := 1 to *\useitems do { usedir := if member(defns_above, useitems[i]) then "<-U" else "U->" writes(if i > 1 then " " else "", linkto(useitems[i], usedir)) } writes(")") } if \pendingprev | \pendingnext then { writes(" [") writes(linkto(\pendingprev, "<-")) writes("D") writes(linkto(\pendingnext, "->")) writes("]") } <>= if /shortxref then { useitems := useitemstab[\thischunk] if \pendingprev | \pendingnext | (\pendinguses, (*\useitems > 0 | \notused)) then { <> <> useitemstab[\thischunk] if *\useitems > 0 & (\pendingprev | \pendingnext) then writes("; ") p := if *\useitems > 0 then "previous" else "Previous" n := if *\useitems > 0 then "next" else "Next" if \pendingprev then if \pendingnext then writes(linkto(pendingprev, p), " and ", linkto(pendingnext, "next"), " definitions") else writes(linkto(pendingprev, p), " definition") else if \pendingnext then writes(linkto(pendingnext, n), " definition") if \pendingprev | \pendingnext | *\useitems > 0 then writes(".") if \notused then writes("
\nThis code is written to a file (or else not used).") write("

") pendinguses := pendingprev := pendingnext := useitems := notused := &null } } else &null <>= useprefix := "Used " every i := 1 to *\useitems do { usedir := if member(defns_above, useitems[i]) then "above" else "below" usesuffix := if *\useitems > 1 then " (" || i || ")" else "" writes(useprefix, linkto(useitems[i], usedir || usesuffix)) useprefix := ", " } <>= useitems := [] notused := &null @ The hack here is to put the supplementary information in a blockquote area after the code. <>= if ecode == "" then { writes("

") ecode := "
" } @ The HTML back end ignores [[@xref begindefs]], [[@xref defitem]], and [[@xref enddefs]]; it uses the [[nextdef]] and [[prevdef]] links instead. <>= "begindefs" | "defitem" | "enddefs" : &null @ <>= "beginchunks" : { write(braw, "
    ") } "chunkbegin" : { writes("
  • "); comma := ": "; count := 0 arg ? { ref := tab(upto(' ')); =" "; name := tab(0) } writechunk(&null, ref, "i", name) } "chunkuse" : { writes(comma, linkto(arg, "U" || (count +:= 1))); comma := ", " } "chunkdefn" : { writes(comma, linkto(arg, "D" || (count +:= 1))); comma := ", " } "chunkend" : write() "endchunks" : write("
", eraw) <>= "beginindex" : { write(braw, "
    ") } "entrybegin" : { writes("
  • "); comma := ": "; count := 0 arg ? { ref := tab(upto(' ')); =" "; name := tab(0) } writes(linklabelto("NWI-" || escapeSpecials(name), ref, name, "b")) } "entryuse" : { writes(comma, linkto(arg, "U" || (count +:= 1))); comma := ", " } "entrydefn" : { writes(comma, linkto(arg, "D" || (count +:= 1))); comma := ", " } "entryend" : write() "endindex" : write("
", eraw) <<@index>>= if ="@index " then { if /noindex then { if fun := tab(upto(' ')) then {move(1); arg := tab(0)} else { fun := tab(0); arg := &null } case fun of { <> default : (\arg|"") ? warn_unknown("index " || fun) } } # don't get any warnings if not doing indexing } else @ The local identifier cross-reference doesn't show each use; it just shows the identifiers that are defined, with links to the full index. <>= "use" : { lastindexref := lastxrefref; lastxrefref := &null } "defn" : { <> } "localdefn" : { <> } "nl" : &null # do nothing -- no hope of keeping line numbering @ I tried an abbreviated index format at the end of the chunk, but it looks so bad that I've punted on it (for now). Hence the [[\uglyindex]]. <>= "begindefs" : if \localindex then { <> if /uglyindex | /shortxref then { <> writes("Defines ") } else { writes("
[") } comma := "" } "isused" : &null "defitem" : if \localindex then { writes(comma, linkto("NWI-" || escapeSpecials(arg), sgmlwrap("code", escapeSpecials(arg)))) comma := if /uglyindex | /shortxref then ", " else ", " } "enddefs" : if \localindex then write(if /uglyindex | /shortxref then " (links are to index).

" else "]") "beginuses" | "isdefined" | "useitem" | "enduses" : &null # use local links @ \subsection{Support functions} Here's all our anchor support goo. Unless the anchor is deemed ``bare,'' the beginning and ending tags are protected with [[braw]] and [[eraw]]. This step is necessary to make \verb=noweave -latex+html= work without trapping {\LaTeX} markup in a \texttt{rawhtml} environment. <<*>>= procedure linklabelto(label, ref, contents, bare) local s s := if \bare then "" else braw s ||:= if \label | \ref then "" else "" s ||:= if \bare then "" else eraw s ||:= contents s ||:= if \bare then "" else braw s ||:= if \label | \ref then "" else "" s ||:= if \bare then "" else eraw return s end @ Here are two abbreviations. <<*>>= procedure linklabel(label, contents, bare) return linklabelto(label, &null, contents, bare) end procedure linkto(ref, contents) return linklabelto(&null, ref, contents, "bare") end @ [[linkto]] is always used ``bare,'' and I've decided that the inconsistency in calling conversions is worth the savings from not having to write a meaningless argument everywhere. Reasonable people could disagree. @ Another support function is used for wrapping tags around text: <<*>>= procedure sgmlwrap(tag, s) return "<" || tag || ">" || s || "" end @ Lucky for us, {\tt HTML} has few special characters. Unlucky for us, we have to deal with each one seperately. Nothing much to whine about, really. <<*>>= procedure escapeSpecials (l) static escapes, specials initial {escapes := table(); escapes["&"] := "&" escapes["<"] := "<" escapes[">"] := ">" escapes["\""] := """ specials := '' every specials ++:= key(escapes) } s := "" l ? { while s ||:= tab(upto(specials)) do s ||:= escapes[move(1)] return s || tab(0) } end @ A special function is used to implement {\tt noweb}'s quoting convention within chunk names. <<*>>= procedure convquotes(s) r := "" s ? { # don't call escapeSpecials on s; it destroys internal markup while r ||:= tab(find("[[")) do { ="[[" | fatal("impossible missing [[") r ||:= "" || tab(find("]]")) r ||:= tab(many(']')-2) ="]]" | fatal("impossible missing ]] in ", image(s)) r ||:= "" } return r || tab(0) } end @ <<*>>= procedure warn_unknown(tag) static warned initial warned := set() if not member(warned, tag) then { write(&errout, "Warning: unrecognized escape @", tag, " ", tab(0)) insert(warned, tag) } return end @ <<*>>= procedure fatal(L[]) write!([&errout, "noweb error in tohtml: "] ||| L) exit(1) end @ \end{document} <<*>>= procedure rcsinfo () return "$Id: tohtml.nw,v 1.22 2008/10/06 01:03:05 nr Exp nr $" || "$Name: v2_12 $" end @