; ; This file contains conversion functions for translating a text file in troff ; -me format to an equivalent file in html format. These functions are ; generally not used within emacs. Rather, their normal usage is via the UNIX ; shell script defined in ~gfisher/bin/me2html, q.v. (This script is also ; defined as an alias in ~gfisher/.cshrc). See also the shell script in ; ~gfisher/bin/me2hmtl1, which me2hmtl calls. ; ; Here is the the general strategy for how the emacs conversion functions are ; used: ; ; * An editable base file is maintained in troff -me format, with a .me ; extension. A corresponding html file is mechanically generated from the ; .me file. ; ; * In order for the conversion to operate correctly, the auxiliay troff ; macros defined in ~gfisher/nroff/stdhdr.me must be used. Specifically, ; the troff file should begin with the following initial command sequence: ; ; .sz 11 ; .ds ~ /usersgfisher/nroff ; .so \*~/stdhdr.me ; ; (This sets the font to 11 point. For a different font size, change the ; argument of the .sz command accordingly). ; ; * While the format of the generated html file is reasonably readable, it ; is not intended for direct editing. Rather, editing should be done to ; .me file and the conversion rerun. ; ; * To embed html commands in a .me file, the html commands are guarded with ; a troff conditional of the form ".if \nh". By convention, the troff ; register "h" is used to signify html processing, and hence the -rh ; command-line switch is given to troff to enable html commands (see ; below). ; ; * WARNING: To embed html commands, the "<" of the command sequence MUST ; be the first char on the line, and the html command must be the only ; thing on the line. This restrictrition exists in order that non-html ; usages of "<", ">", "&" can be converted to <, >, and &, resp. ; ; * To create a .html file from a .me file, two passes are necessary. The ; first pass uses the me2html function in this file in batch mode. This ; pass adds html commands that either replace or augment the equivalent ; .me commands (see me2html defun code for further details). The second ; pass runs nroff on the result of the 1st pass converter, putting the ; result in a pure html file. Both passes can be run in a single UNIX ; command stream, as follows: ; ; me2html1 $1 | gtbl | groff -rh1 -me -Tascii | col -b > $1.html ; ; This command stream constitutes the defintion of the script in ; ~gfisher/bin/me2html. me2html1 (q.v.) is the lower-level CShell script ; that runs this .el file in emacs batch mode. Note the "-rh1" arg to ; groff, which turns on formatting for html (no headers, no hyphenation, ; etc.). Note also that me2html1 is a low-level script that is not ; intended to be used directly at the top-level UNIX shell. ; ; * As an alternative to explicit insertion of troff conditionals, a base ; file with an extension .meh can be maintained as the progenetor of pure ; .me and .html files. A .meh file contains both -me commands and ; unguarded html title and anchor commands. ; ; * To run a .meh file through a normal groff command stream, an additional ; filter is placed at the very begining of the command stream. E.g., with ; a plain .me file, the following is used to generate PostScript: ; ; gtbl \!:1 | groff -me > \!:1:r.ps ; ; whereas to handle a .meh file, the unhtml filter is added as follows ; ; unhtml \!:1 | gtbl | groff -me > \!:1:r.ps ; ; In current practice, .meh files are rarely used. Instead, html title ; and anchor commands are guarded with explicit troff conditionals. In ; particular, there is now a troff macro file called html.me that contains ; some handy macros that work conditionally depending on if a file is ; being processed for html or troff. ; ; For a largish example of a .me file with embedded html, see ; ~gfisher/src/rsl/doc/ref-man/ref-man.me. The companion .html file in the ; same directory was created using me2html. ; ; (defun dot () (point)) (defun me2html () "Convert -me formatting commands to their html equivalents." (interactive) (setq case-fold-search nil) (message (buffer-name)) (message "%d" (buffer-size)) (beginning-of-buffer) (convert-html-escape-chars) (beginning-of-buffer) (replace-regexp "^\.nf" ".br\n<pre>\n.nf") (beginning-of-buffer) (replace-regexp "^\.fi" ".br\n</pre>\n.fi") (beginning-of-buffer) (replace-regexp "^\.(i" ".br\n<blockquote>\n.br") (beginning-of-buffer) (replace-regexp "^\.)i" ".br\n</blockquote>\n.br") (beginning-of-buffer) (replace-regexp "^\.(l F" ".br\n<blockquote>\n.br") (beginning-of-buffer) (replace-regexp "^\.)l F" ".br\n</blockquote>\n.br") (beginning-of-buffer) ; NOTE: .na is now global, so no explicit .ad or na commands appear in ; conversions of .(n, (l, and .(t. (replace-regexp "^\.(n" ".nf\n<blockquote><pre>\n.br") (beginning-of-buffer) (replace-regexp "^\.)n" ".fi\n</pre></blockquote>\n.br") (beginning-of-buffer) (replace-regexp "^\.(l" ".nf\n<blockquote><pre>") (beginning-of-buffer) (replace-regexp "^\.)l" ".fi\n</pre></blockquote>") (beginning-of-buffer) (replace-regexp "^\.(t" ".nf\n<blockquote><pre>\n.br") ; (replace-regexp "^\.(t" "<blockquote><pre><tt>") (beginning-of-buffer) (replace-regexp "^\.)t" ".fi\n</pre></blockquote>\n.br") ; (replace-regexp "^\.)t" ".br\n</tt></pre></blockquote>") (beginning-of-buffer) (replace-regexp "^\.bp" ".br\n<pre>\n\n</pre>\n.br") (beginning-of-buffer) (replace-regexp "^\.TS" ".br\n<pre>\n.TS") (beginning-of-buffer) (replace-regexp "^\.TE" ".br\n.TE\n</pre>\n.br") (beginning-of-buffer) (replace-regexp "^\.pp" ".br\n<p>\n.br") (beginning-of-buffer) (replace-regexp "^\.lp" ".br\n<p>\n.br") (beginning-of-buffer) (convert-font "B" "strong" "/strong") (beginning-of-buffer) (convert-font "I" "em" "/em") (beginning-of-buffer) (convert-font "C" "tt" "/tt") (beginning-of-buffer) (convert-font "(CI" "em><tt" "/em></tt") (beginning-of-buffer) (convert-font "(CB" "strong><tt" "/strong></tt") (beginning-of-buffer) (convert-font "H" "tt" "/tt") (beginning-of-buffer) (convert-font "(HI" "em><tt" "/em></tt") (beginning-of-buffer) (convert-font "(HB" "strong><tt" "/strong></tt") (beginning-of-buffer) ; Next line slightly preempts convert-sp. (replace-regexp "^\.sp\n" ".br\n<pre></pre>\n.br\n") ; (replace-string "\n\n" "\n.br\n<pre></pre>\n.br\n") (beginning-of-buffer) (convert-sp) (beginning-of-buffer) (convert-pspic) (beginning-of-buffer) (convert-sections) (beginning-of-buffer) (convert-lists) (beginning-of-buffer) (convert-centering) (beginning-of-buffer) (replace-regexp "^\.hl" ".br\n<hr>\n.br") (beginning-of-buffer) (echo-file) ) (defun unhtml () (unhtitle) (unanchor) (echo-file) ) (defun convert-font (mf hf ef) "Convert a -me font-change segment to the equivalent html." (interactive "s" "s" "s") (beginning-of-buffer) (while (search-forward (concat "\\f" mf) nil t) (progn (backward-delete-char (+ 2 (length mf))) (insert-string (concat "<" hf ">")) (if (not (search-forward "\\fP" nil t)) (progn (message "%s%d" "missing \\fP at position" (dot)) (what-line))) (backward-delete-char 3) (insert-string (concat "<" ef ">")) ) ) ) (defun convert-sections () "Add equivalent html commands around extant .me section commands." (interactive) (beginning-of-buffer) (while (re-search-forward "^.sh " nil t) (progn (setq d (dot)) (forward-word 1) (setq n (buffer-substring d (dot))) (setq nn (+ 1 (string-to-int n))) (setq n (int-to-string nn)) (beginning-of-line) (insert-string "\n<h" n ">\n") (forward-line 1) (insert-string ".br\n</h" n ">\n") ) ) ; Now do the .uh's as <h2>'s (beginning-of-buffer) (while (search-forward "\n.uh " nil t) (progn (setq d (dot)) (forward-word 1) (setq n (buffer-substring d (dot))) (beginning-of-line) (insert-string "\n<h" 2 ">\n") (forward-line 1) (insert-string ".br\n</h" 2 ">\n") ) ) ) (defun convert-sp-too-simple () (interactive) (beginning-of-buffer) (while (search-forward "\n.sp " nil t) (progn (beginning-of-line) (insert-string ".br\n<pre>\n") (next-line 1) (insert-string "</pre>\n") ) ) ) (defun convert-sp () "Convert an -me spacing command of the form \".sp Nv\" to (weakder) html spacing. Specifically, round N up to the nearest int, and convert to that many spaces. Hence, if N >= .5, then one space will be thrown. Throwing a space in html is done with \"<pre>\\n</pre>\"." (interactive) (while (search-forward "\n.sp" nil t) (setq l 0) (move-over-whitespace) (if (not (= (following-char) ?.)) (setq l (get-next-int)) ) (if (= (following-char) ?.) (progn (forward-char 1) (setq f (get-next-int)) (if (>= f 5) (setq l (+ l 1)) ) ) ) (beginning-of-line) (kill-line 1) (if (> l 0) (progn (setq l (- l 1)) (insert-string ".br\n<pre>\n") (while (>= (setq l (- l 1)) 0) (insert-string "\n") ) (insert-string ".br\n</pre>\n.br\n") ) (insert-string ".hbr\n") ) ) ) (defun get-next-int () (interactive) (setq m (dot)) (move-over-digits) (string-to-int (buffer-substring m (dot))) ) (defun move-over-digits () (interactive) (while (isdigit (following-char)) (forward-char 1)) ) (defun isdigit (c) (interactive) (and (>= c ?0) (<= c ?9)) ) (defun move-over-whitespace () "Move forward over any whitespace chars in from of dot." (interactive) (while (equal (char-syntax (following-char)) ? ) (forward-char 1)) ) (defun convert-pspic () ) (defun convert-lists () "Convert -me numbered and bulleted lists to (weaker) html format. Specifically, all numbered lists go to <ol>'s and bulleted lists goto to <ul>'s. Note that as of June 97, Netscape supports a type parameter in lists, which we now use here. Hopefully this or something like it will become a standard HTML 3 feature." (interactive) (replace-regexp "^\.(E.*" ".br\n<ul>\n.br") (beginning-of-buffer) (replace-regexp "^\.)E" ".br\n</ul>\n.br") (beginning-of-buffer) (replace-regexp "^\\.(L.\\(.\\)" ".br\n<ol type=\\1>\n.br") (beginning-of-buffer) ;Next line is way hokey, since replace-regexp pass should've done it. ;What's going on is that we're getting rid of possibly present 2nd and 3rd ;args in a .(L (replace-regexp "\\(<ol.*>\\).*" "\\1") (beginning-of-buffer) (replace-regexp "\.)L.*" ".br\n</ol>\n.br") (beginning-of-buffer) (replace-regexp "^\.ee" ".br\n<li>\n.br") (beginning-of-buffer) (replace-regexp "^\.le" ".br\n<li>\n.br") ) (defun convert-centering () "Convert -me centering of the form \".(C ... )C\" by surrounding it with \"<pre> ... /<pre>\". Later, we'll convert to html+ \"<p align=center> ... </p>\"." (interactive) ; (replace-regexp "^\.(C" "<pre>\n.(C") ; (beginning-of-buffer) ; (replace-regexp "^\.)C" ".)C\n</pre>") ; (beginning-of-buffer) (replace-regexp "^\.(C" ".br\n<p align=center>\n.br") (beginning-of-buffer) (replace-regexp "^\.)C" ".br\n</p>\n.br") ) (defun convert-html-escape-chars () "Convert non-html usages <, >, and & to <, >, and &, respectively. See restrictions on html command sequencens in doc at top of me2html.el. Serious hack in this regard is not converting ]> to ]%lt since ]> is a troff refer macro." (interactive) (while (re-search-forward "[^\\]<\\|[^\\]>\\|&" nil t) (convert-one-html-escape-char) ) ; Next two lines cause cant get "[^]]>\\|" to work in preceding re-search. (beginning-of-buffer) (replace-string "]>" "]>") (beginning-of-buffer) (replace-string "]<" "]<") ) (defun preceding-preceding-char (n) (progn (interactive) (backward-char n) (setq ch (preceding-char)) (forward-char n) ch)) (defun convert-one-html-escape-char () (interactive) (if (and (= (preceding-char) ?<) (= (preceding-preceding-char 1) ?\n)) (forward-line 1) (progn (if (or (and (= (preceding-char) ?&) (= (preceding-preceding-char 1) ?\\)) (and (= (preceding-char) ?>) (= (preceding-preceding-char 1) ?-) ; (= (preceding-preceding-char 2) ?\() ; (= (preceding-preceding-char 3) ?\\) ) ) nil (progn (if (= (preceding-char) ?<) (setq ch "lt") (if (= (preceding-char) ?>) (setq ch "gt") (if (= (preceding-char) ?&) (setq ch "amp") ) ) ) (backward-delete-char 1) (insert-string "&" ch) ))))) (defun echo-file () "Run this in batch mode to test streaming through emacs." (interactive) (message "$$$$$$$$$$$$$$$$$$$ REAL BEGINNING $$$$$$$$$$$$$$$$$$$") (while (not (eobp)) (progn (setq d (dot)) (end-of-line) (message "%s" (buffer-substring d (dot))) (beginning-of-line) (forward-line 1) ) ) ) (defun unhtitle () (interactive) (search-forward "<title>" nil t) (search-backward "<") (setq dot (dot)) (search-forward "</title>" nil t) (next-line 1) (beginning-of-line) (kill-region dot (dot)) )