; ; This file contains conversion functions for translating a text file in troff ; -me format to an equivalent file in html format. These functions are ; generally not used within emacs. Rather, their normal usage is via the UNIX ; shell script defined in ~gfisher/bin/me2html, q.v. (This script is also ; defined as an alias in ~gfisher/.cshrc). See also the shell script in ; ~gfisher/bin/me2hmtl1, which me2hmtl calls. ; ; Here is the the general strategy for how the emacs conversion functions are ; used: ; ; * An editable base file is maintained in troff -me format, with a .me ; extension. A corresponding html file is mechanically generated from the ; .me file. ; ; * In order for the conversion to operate correctly, the auxiliary troff ; macros defined in ~gfisher/nroff/stdhdr.me must be used. Specifically, ; the troff file should begin with the following initial command sequence: ; ; .sz 11 ; .ds ~ /usersgfisher/nroff ; .so \*~/stdhdr.me ; ; (This sets the font to 11 point. For a different font size, change the ; argument of the .sz command accordingly). ; ; * While the format of the generated html file is reasonably readable, it ; is not intended for direct editing. Rather, editing should be done to ; .me file and the conversion rerun. ; ; * To embed html commands in a .me file, the html commands are guarded with ; a troff conditional of the form ".if \nh". By convention, the troff ; register "h" is used to signify html processing, and hence the -rh ; command-line switch is given to troff to enable html commands (see ; below). ; ; * WARNING: To embed html commands, the "<" of the command sequence MUST ; be the first char on the line, and the html command must be the only ; thing on the line. This restriction exists in order that non-html ; usages of "<", ">", "&" can be converted to <, >, and &, resp. ; ; * To create a .html file from a .me file, two passes are necessary. The ; first pass uses the me2html function in this file in batch mode. This ; pass adds html commands that either replace or augment the equivalent ; .me commands (see me2html defun code for further details). The second ; pass runs nroff on the result of the 1st pass converter, putting the ; result in a pure html file. Both passes can be run in a single UNIX ; command stream, as follows: ; ; me2html1 $1 | gtbl | groff -rh1 -me -Tascii | col -b > $1.html ; ; This command stream constitutes the definition of the script in ; ~gfisher/bin/me2html. me2html1 (q.v.) is the lower-level CShell script ; that runs this .el file in emacs batch mode. Note the "-rh1" arg to ; groff, which turns on formatting for html (no headers, no hyphenation, ; etc.). Note also that me2html1 is a low-level script that is not ; intended to be used directly at the top-level UNIX shell. ; ; * As an alternative to explicit insertion of troff conditionals, a base ; file with an extension .meh can be maintained as the progenitor of pure ; .me and .html files. A .meh file contains both -me commands and ; unguarded html title and anchor commands. ; ; * To run a .meh file through a normal groff command stream, an additional ; filter is placed at the very beginning of the command stream. E.g., ; with a plain .me file, the following is used to generate PostScript: ; ; gtbl \!:1 | groff -me > \!:1:r.ps ; ; whereas to handle a .meh file, the unhtml filter is added as follows ; ; unhtml \!:1 | gtbl | groff -me > \!:1:r.ps ; ; In current practice, .meh files are rarely used. Instead, html title ; and anchor commands are guarded with explicit troff conditionals. In ; particular, there is now a troff macro file called html.me that contains ; some handy macros that work conditionally depending on if a file is ; being processed for html or troff. ; ; For a largish example of a .me file with embedded html, see ; ~gfisher/src/rsl/doc/ref-man/ref-man.me. The companion .html file in the ; same directory was created using me2html. ; ; (defun dot () (point)) (defun me2html () "Convert -me formatting commands to their html equivalents." (interactive) (setq case-fold-search nil) (message (buffer-name)) (message "%d" (buffer-size)) (beginning-of-buffer) (insert-string "<html\\>\n. br\n") ;(beginning-of-buffer) ;(insert-html-heading-if-necessary) (beginning-of-buffer) (convert-html-escape-chars) (beginning-of-buffer) ; Do this br-replacement first since numerous .br's are added below. (replace-string "\n.br\n" "\n.br\n<br>\n.br\n") (beginning-of-buffer) (replace-string "\n.nf" "\n.br\n<pre>\n.nf") (beginning-of-buffer) (replace-string "\n.fi\n" "\n.br\n</pre>\n.fi\n") (beginning-of-buffer) (replace-string "\n.(i" "\n.br\n<blockquote>\n.br") (beginning-of-buffer) (replace-string "\n.)i" "\n.br\n</blockquote>\n.br") (beginning-of-buffer) (replace-string "\n.(l F" "\n.br\n<blockquote>\n.br") (beginning-of-buffer) (replace-string "\n.)l F" "\n.br\n</blockquote>\n.br") (beginning-of-buffer) ; NOTE: .na is now global, so no explicit .ad or na commands appear in ; conversions of .(n, (l, and .(t. ; .(n ... .)n conversion is now smarter by inserting <br>'s between each ; line. (convert-no-fill-block) ;(replace-string "\n.(n" "\n.nf\n<blockquote><pre>\n.br") ;(beginning-of-buffer) ;(replace-string "\n.)n" "\n.fi\n</pre></blockquote>\n.br") (beginning-of-buffer) (replace-string "\n.(l" "\n.nf\n<blockquote><pre>") (beginning-of-buffer) (replace-string "\n.)l" "\n.fi\n</pre></blockquote>") (beginning-of-buffer) (replace-string "\n.(t 0" "\n.nf\n<pre>\n.br") (beginning-of-buffer) (replace-string "\n.)t 0" "\n.nf\n</pre>\n.br") (beginning-of-buffer) (replace-string "\n.(ts" "\n.nf\n<blockquote><pre>\n.br") (beginning-of-buffer) (replace-string "\n.)ts" "\n.fi\n</pre></blockquote>\n.br") (beginning-of-buffer) (replace-string "\n.(t" "\n.nf\n<blockquote><pre>\n.br") ; (replace-string "\n.(t" "\n<blockquote><pre><tt>") (beginning-of-buffer) (replace-string "\n.)t" "\n.fi\n</pre></blockquote>\n.br") ; (replace-string "\n.)t" "\n.br\n</tt></pre></blockquote>") (beginning-of-buffer) (replace-string "\n.bp" "") ; (replace-string "\n.bp" "\n.br\n<pre>\n\n</pre>\n.br") ; (beginning-of-buffer) ; (replace-string "\n.TS" "\n.br\n<pre>\n.TS") ; (beginning-of-buffer) ; (replace-string "\n.TE" "\n.br\n.TE\n</pre>\n.br") (beginning-of-buffer) (replace-string "\n.pp" "\n.br\n<p>\n.br") (beginning-of-buffer) (replace-string "\n.pep" "\n.br\n</p><p>\n.br") (beginning-of-buffer) (replace-string "\n.pe" "\n.br\n</p>\n.br") ; Leave out the following dumb guessing about when not to preceed <p> with ; </p>. Instead, we'll now use .pep and .pe ; (if (search-forward "<p>" nil t) ; (progn ; (backward-char 3) ; (if (and (> (- (dot) 4) 0) ; (string= (buffer-substring (dot) (- (dot) 4)) "</p>")) ; (backward-delete-char 4) ; ) ; ) ; ) (beginning-of-buffer) (replace-string "\n.lp" "\n.br\n<p>\n.br") (beginning-of-buffer) ; This didn't work. See new .(S ... .)S macro pair in html.me ; (replace-regexp "^\\.sz \\(+\\|-\\)\\(.*\\)" ".br\n<font size=\\1\\2>\n.br") (replace-regexp "^\\.(S \\(.*\\)" ".br\n<font size=\\1>\n.br") (beginning-of-buffer) (replace-regexp "^\\.)S" ".br\n</font>\n.br") (beginning-of-buffer) (convert-embedded-font-change) ; 15aug00: Let's try this again (previously was commented out. (beginning-of-buffer) (replace-string "\n.hl" "\n.br\n<hr>\n.br") (beginning-of-buffer) (convert-font "B" "strong" "/strong") (beginning-of-buffer) (convert-font "[HB]" "strong" "/strong") (beginning-of-buffer) (convert-font "I" "em" "/em") (beginning-of-buffer) (convert-font "(BI" "strong><em" "/em></strong") (beginning-of-buffer) (convert-font "[BI]" "strong><em" "/em></strong") (beginning-of-buffer) (convert-font "C" "tt" "/tt") (beginning-of-buffer) (convert-font "(CI" "em><tt" "/tt></em") (beginning-of-buffer) (convert-font "[CI]" "em><tt" "/tt></em") (beginning-of-buffer) (convert-font "(CB" "strong><tt" "/tt></strong") (beginning-of-buffer) (convert-font "[CB]" "strong><tt" "/tt></strong") (beginning-of-buffer) (convert-font "[CBI]" "strong><em" "/em></strong") (beginning-of-buffer) (convert-font "H" "tt" "/tt") ; We've started using styles to get helvetica, so this doesn't work ; anymore. ; (beginning-of-buffer) ; (convert-font "(HI" "em><tt" "/tt></em") ; (beginning-of-buffer) ; (convert-font "(HB" "strong><tt" "/tt></strong") ; ; The following is a bit of a hack to make troff use of bookman transparent ; to html. I.e., troff bookman is must normal html. (beginning-of-buffer) (convert-font "[BMB]" "strong><tt" "/strong></em") (beginning-of-buffer) (convert-font "[BMI]" "em><tt" "/em></em") (beginning-of-buffer) (convert-font "[BMBI]" "strong><em" "/em></strong") ; The following deals with the fancy troff italic font. (beginning-of-buffer) (convert-font "[ZCMI]" "em" "/em") (beginning-of-buffer) ; Next line slightly preempts convert-sp ; (21nov99: try with <p> instead of <pre></pre>) (replace-string "\n.sp\n" "\n.br\n<p>\n.br\n") ; (replace-string "\n.sp\n" "\n.br\n<pre></pre>\n.br\n") ; (replace-string "\n\n" "\n.br\n<pre></pre>\n.br\n") (beginning-of-buffer) (convert-sp) (beginning-of-buffer) (convert-pspic) (beginning-of-buffer) (convert-sections) (beginning-of-buffer) (convert-lists) (beginning-of-buffer) (convert-centering) (beginning-of-buffer) (convert-footnotes) (beginning-of-buffer) (convert-colors) (beginning-of-buffer) (convert-subscripts-and-superscripts) (beginning-of-buffer) (convert-special-chars) (beginning-of-buffer) (convert-tbl) (beginning-of-buffer) ; This is needed to make sure that any .ad's used for troff purposes don't ; get interpreted when we're formatting for html, since this will disable ; the global .na. Among other things, having ".ad" on globally causes long ; hrefs to be adjusted, which can insert extra spaces which renders them ; f---ed up. Note further that sticking in ". ad" instead of ".ad" won't ; work, cause we're beyond that here. Woe, this is f---ing ass confusing, ; isn't it? Finally note that we do this last in case any of the preceding ; conversions stuck in .ad's, the tbl conversion being one such. (replace-string "\n.ad" "\n.if !\\\\nh .ad") (end-of-buffer) (insert-string ". br\n</body>\n. br\n</html>") (beginning-of-buffer) (echo-file) ) (defun insert-html-heading-if-necessary () "If no .(TI command is found, insert a default title surrounded by begin/end heading tags. The filename minus the assumed ``me'' extension is used for the default title. Tuff stuff if the extension is different (you should put in a title command explicitly, slacker). After the end heading insert a begin body tag with default white background." (if (not (search-forward-regexp "^\.(TI" nil t)) (progn (beginning-of-buffer) (search-forward "<html") (next-line 1) (beginning-of-line) (insert (concat "<head>\n" (substring (buffer-name) 0 -3) "</head>\n. br\n<body bgcolor=white>\n. br\n" ) ) ) ) ) (defun unhtml () (unhtitle) (unanchor) (echo-file) ) (defun convert-font (mf hf ef) "Convert a -me font-change segment to the equivalent html." (interactive "sme font spec (what follows \\f): shtml start font command: shtml end font command: ") (while (search-forward (concat "\\f" mf) nil t) (progn (backward-delete-char (+ 2 (length mf))) (insert-string (concat "<" hf ">")) (if (not (search-forward "\\fP" nil t)) (progn (message "%s%d" "missing \\fP at position" (dot)) ) (progn (backward-delete-char 3) (insert-string (concat "<" ef ">")) ) ) ) ) ) (defvar default-uh-size 3) (defun convert-sections () "Add equivalent html commands around extant .me section commands." (interactive) (beginning-of-buffer) (while (re-search-forward "^\\.sh " nil t) (convert-sh) ) ; Now do the .uh's as <h`uh-size`>'s. Note that we use the 2nd depth arg ; if it's there. (beginning-of-buffer) (while (re-search-forward "^\\.uh " nil t) (progn (end-of-line) (if (numberp (preceding-char)) (progn (setq d (dot)) (backward-word 1) (setq uh-size (1+ (string-to-int (buffer-substring (dot) d)))) ) (progn (setq uh-size default-uh-size) ) ) (beginning-of-line) (insert-string ".br\n<h" uh-size ">\n") (forward-line 1) (insert-string ".br\n</h" uh-size ">\n") ) ) ; Lastly, do the .ap's, a la the .sh 1's. DO NOT do the .Ap's, since these ; are chapter-level appendices, as opposed to .ap's, which are ; section-level appendices. (beginning-of-buffer) (while (search-forward-regexp "^\\.ap" nil t) (convert-sh-title 1) ) ; Note well that for book chapters we don't do anything here. This is ; because we don't need html header commands around the chapter name nor do ; we need html name anchors. The latter is based on the fact that we ; assume chapters are big enough always to be in a separate file. ) (defun convert-sh () "Work-doing helper for convert-sections, q.v. This function graps the section number and converts it to a level number suitable for use in generating the html <h> command. It then and then calls convert-sh-title, q.v., to finish the work." (interactive) (setq d (dot)) (forward-word 1) (setq n (buffer-substring d (dot))) (setq nn (min 4 (+ 1 (string-to-int n)))) (setq n (int-to-string nn)) (convert-sh-title n) ) (defun convert-sh-title (n) "Another work-doing helper for convert-sections, q.v. This function takes the html section level in the n arg and then does the meat of the work. This work is to put html section and href name tags around the troff section command. Note that for level 1 sections that are in separate files, the href name tags are superfluous, but we'll worry about optimizing them out in some future life. The only reason we care is to make the generated html more sensible and understandable to the human (e.g., 205) reader." (interactive) (search-forward "\"") (setq d (dot)) (search-forward "\"") (setq s (buffer-substring d (dot))) (beginning-of-line) ; Note the .ll increment/decrement around the <a name= ...> command. If ; we don't do this, long section names will be split onto two lines which ; will make the href in valid (aka, f---ed up). (insert-string ".br\n.ll 1000\n<a name=\"" s ">\n.ll 79\n") (insert-string ".br\n<h" n ">\n") (forward-line 1) (insert-string ".br\n</h" n ">\n") (insert-string ".br\n</a>\n") ) (defun convert-sp-too-simple () (interactive) (beginning-of-buffer) (while (search-forward "\n.sp " nil t) (progn (beginning-of-line) (insert-string ".br\n<pre>\n") (next-line 1) (insert-string "</pre>\n") ) ) ) (defun convert-sp () "Convert an -me spacing command of the form \".sp Nv\" to (weaker) html spacing. Specifically, round N up to the nearest int, and convert to that many spaces. Hence, if N >= .5, then one space will be thrown. Throwing a space in html is done with a <br>." (interactive) (while (search-forward "\n.sp" nil t) ; The following cond is brutal hack to allow macro defs like ".spt" (if (or (< (following-char) ?a) (> (following-char) ?z)) (progn (setq l 0) (move-over-whitespace) (if (not (= (following-char) ?.)) (setq l (get-next-int)) ) (if (= (following-char) ?.) (progn (forward-char 1) (setq f (get-next-int)) (if (>= f 5) (setq l (+ l 1)) ) ) ) (beginning-of-line) (kill-line 1) (insert-string ".br\n<br>\n.br\n") (if (> l 0) (progn (setq l (- l 1)) (insert-string ".br\n<br>\n.br\n") ; This allows +/-1 control (while (>= (setq l (- l 1)) 0) (insert-string ".br\n<br>\n.br\n") ) ) ) ) ) ) ; OK, let's just bite the bullet and convert two plain empty lines into the ; forced throw of two lines in html via <pre>. This may cause some trouble ; in older .me files, but let's see how it goes. Readibilitywise, I'm ; getting sick of having to use .sp 2 everywhere there use to be two blank ; lines. ; ; Well, on second thought, this does not work well inside no-fill text, such ; as .h files with double blank lines. Eventually we can do something ; sophisticated with this. In the meantime, it's off, and we'll go back to ; ".sp 2"s to force double spacing in .html files. ;(beginning-of-buffer) ;(replace-string "\n\n\n" "\n.br\n<pre>\n\n.br\n</pre>\n.br\n") ) (defun get-next-int () (interactive) (setq m (dot)) (move-over-digits) (string-to-int (buffer-substring m (dot))) ) (defun move-over-digits () (interactive) (while (isdigit (following-char)) (forward-char 1)) ) (defun isdigit (c) (interactive) (and (>= c ?0) (<= c ?9)) ) (defun move-over-whitespace () "Move forward over any whitespace chars in from of dot." (interactive) (while (equal (char-syntax (following-char)) ? ) (forward-char 1)) ) (defun convert-pspic () ) (defun convert-lists () "Convert -me numbered and bulleted lists to (weaker) html format. Specifically, all numbered lists go to <ol>'s and bulleted lists goto to <ul>'s. Note that as of June 97, Netscape supports a type parameter in lists, which we now use here. Hopefully this or something like it will become a standard HTML 3 feature." (interactive) ; F---ing piece of s--- IE apparently requires force of type=disc. Without ;it second and subsequent lists use open circle instead of closed bullet. ;Well, it cant be dealt with here properly, so it's done in convert-bullet. (replace-regexp "\n\\.(E.*" "\n.br\n<ul>\n.br") (beginning-of-buffer) ; Adding </font> is for the f***ed up way Netscape/Mozilla seem to handle ; the lack of </font> for colored bullets. Plus, it doesn't work. So ; extra </font>'s need to be added for long colored bulleted lists. ; (replace-string "\n.)E" "\n.br\n</ul></font>\n.br") ; 15feb06 Update: Evidently the following line is now bogus. Putting in ; the <font color=black> now causes Safari to mis-number items, ; OLD: Safari needs the black color thing. ; specifically, rewind all <ol> items back to 1 after nested <ul> items. ; (replace-string "\n.)E" "\n.br\n</ul><font color=black>\n.br") (replace-string "\n.)E" "\n.br\n</ul>\n.br") (beginning-of-buffer) (replace-regexp "\\.(L.\\(.\\)" ".br\n<ol type=\\1>\n.br") (beginning-of-buffer) ;Next line is way hokey, since replace-regexp pass should've done it. ;What's going on is that we're getting rid of possibly present 2nd and 3rd ;args in a .(L (replace-regexp "\\(<ol.*>\\).*" "\\1") (beginning-of-buffer) (replace-regexp "^\.)L.*" ".br\n</ol>\n.br") (beginning-of-buffer) (convert-bullet) (beginning-of-buffer) (replace-string "\n\n.le" "\n.br\n<br><br>\n.le") (beginning-of-buffer) (replace-string "\n.le" "\n.br\n<p>\n<li>\n.br") ) (defun convert-bullet () "6may03 Update: Safari evidently requires the following format <font color=x><li><font color=x><font color=black> which other browsers are OK with. Who the f--- knows why? 13jun02 Update: Ah, but since IE pre-version 5 is still quite likely to be out there, and it makes such a f---ing mess of things with the desired formant, we have no choice but to go with the original format, or at least a version of it. 12jun02 Update: Apparently recent versions of IE don't have the earlier behavior, so we can do the translation now in the desired way, viz., convert a coloured enum item to <font color=some-color><li></font>. Convert a possibly colored bullet. With no arg, .ee goes straight to <li>. With color arg, .ee color goes to <font color=red><li><font color=black>. Due to the fact that IE is a f---ing piece of s---, the explicit ``color=black'' is needed instead of just ``/font''. Note that this forces the font color of enum items to be black. If we want a more general solution, we need to deal with this at some point. Yet more in the IE is a f---ing piece of s--- category requires that for colored bullets, type=disc must be included with the <li>. If it's not, second and subsequent lists use circles in instead of bullets. F--- IE, hard." (interactive) (replace-regexp "^\\.ee \\(.*\\)" ".sp\n<font color=\\1><li type=disc><font color=\\1><font color=black>\n.br") ;Not Safari ".sp\n<font color=\\1><li type=disc><font color=black>\n.br") ;WISH ".sp\n<font color=\\1><li type=disc></font>\n.br") ;OLD: ".sp\n<font color=\\1><li type=disc><font color=black>\n.br") (beginning-of-buffer) (replace-regexp "^\\.ee" ".br\n<li>\n.br") ) (defun convert-centering () "Convert -me centering of the form \".(C ... )C\" by surrounding it with \"<pre ... /<pre>\". Later, we'll convert to html+ \"<p align=center> ... </p>\"." (interactive) ; (replace-string "\n.(C" "\n<pre>\n.(C") ; (beginning-of-buffer) ; (replace-string "\n.)C" "\n.)C\n</pre>") ; (beginning-of-buffer) (replace-string "\n.(C\n" "\n.br\n<p align=center>\n.br\n") (beginning-of-buffer) (replace-string "\n.)C\n" "\n.br\n</p>\n.br\n") ) (defun convert-footnotes () "Surround the \\** footnote ref commands with html superscript and ref commands. Also, put a command at the end of the file to eject the footnote diversion macro. NOTE: we no longer convert -me footnotes of the form \".(f ... .)f\" to the html-specialized footnotes of the form \".(hf ... .)hf\", since this messed up the refer macros. So, we now use .(fn ... .)fn INSTEAD of the -me standard .(f ... .)f. See the definition of the .(fn and .)fn macros in html.me for more information." (interactive) ;(replace-string "\n.(f" "\n.(hf") ;(beginning-of-buffer) ;(replace-string "\n.)f" "\n.)hf") ;(beginning-of-buffer) (setq ft 0) (while (search-forward-regexp "\\\\\\*\\*" nil t) (setq ft (1+ ft)) (setq d (dot)) (previous-line 1) (beginning-of-line) (setq dd (dot)) (end-of-line) (setq f (string= (buffer-substring dd (dot)) ".(fn")) (goto-char d) (backward-delete-char 3) (insert-string (concat (if (not f) " <a href=\"#footnote\\n(ft\">") "<sup>\\n(ft</sup>" (if (not f) "</a>"))) ) (end-of-buffer) (if (> ft 0) (insert-string "\n<pre>\n\n</pre>\n<h3>\n.br\nFootnotes:\n.br\n</h3>\n.br\n.FT\n") ) ) (defun convert-html-escape-chars () "Convert non-html usages <, >, and & to <, >, and &, respectively. See restrictions on html command sequences in doc at top of me2html.el. Serious hack in this regard is not converting ]> to ]< since ]> is a troff refer macro. 12jan98 note: evidently & is not needed anymore -- Netscape 4 (at least) interprets & straight. 5jan99 update: Don't do the conversion inside .(H ... .)H sequences." (interactive) ; (while (re-search-forward "[^\\]<\\|[^\\]>\\|&" nil t) ; Note that this re search does NOT enforce the beginning of line ; restriction for '<' and '>', but it does exclude '*<' and '*>' patterns ; from being replaced, so that -me sub- and superscripts don't get replaced ; here, and can thus be handled in the subscript-superscript function ; below. I'm not sure why the beginning of line constraint is not ; enforced here, but I'm afraid if I add it now, it might mess something ; up. Instead, we'll hack at this piecemeal (i.e., deal with other legit ; uses of '<' and '>' on a case-by-case basis. ; ; 20jun00 Update: I put in the bol check explicitly instead of trying to ; enforce it in the r.e. Also, put in some additional checks to make sure ; '<' and '>/ conversions does not happen erroneously. (while (re-search-forward "[^\\\*]<\\|[^\\\*]>\\|\\.(H" nil t) (if (string= (buffer-substring (- (dot) 3) (dot)) ".(H") (progn (backward-char 3) (if (bolp) (search-forward "\n.)H") (forward-char 3) ) ) ; OK, now we're at the case where we're not in a .(H ... .)H block. ; Check that we're not in a troff command line. (progn (setq d (dot)) (beginning-of-line) (if (not (= (following-char) ?.)) (progn (goto-char d) (convert-one-html-escape-char) ) (progn (next-line 1) ) ) ) ) ) ; Next two lines cause cant get "[^]]>\\|" to work in preceding re-search. (beginning-of-buffer) (replace-string "]>" "]>") (beginning-of-buffer) (replace-string "]<" "]<") ) (defun preceding-preceding-char (n) (progn (interactive) (backward-char n) (setq ch (preceding-char)) (forward-char n) ch)) (defun convert-one-html-escape-char () (interactive) (if (and (= (preceding-char) ?<) (= (preceding-preceding-char 1) ?\n)) (forward-line 1) (progn (if (or (and (= (preceding-char) ?&) (= (preceding-preceding-char 1) ?\\)) (and (= (preceding-char) ?>) (= (preceding-preceding-char 1) ?-) ; (= (preceding-preceding-char 2) ?\() ; (= (preceding-preceding-char 3) ?\\) ) ) nil (progn (if (= (preceding-char) ?<) (setq ch "lt;") (if (= (preceding-char) ?>) (setq ch "gt;") (if (= (preceding-char) ?&) (setq ch "amp;") ) ) ) (backward-delete-char 1) (insert-string "&" ch) ))))) (defun echo-file () "Run this in batch mode to test streaming through emacs." (interactive) (message "$$$$$$$$$$$$$$$$$$$ REAL BEGINNING $$$$$$$$$$$$$$$$$$$") (while (not (eobp)) (progn (setq d (dot)) (end-of-line) (message "%s" (buffer-substring d (dot))) (beginning-of-line) (forward-line 1) ) ) ) (defun unhtitle () (interactive) (search-forward "<title>" nil t) (search-backward "<") (setq dot (dot)) (search-forward "</title>" nil t) (next-line 1) (beginning-of-line) (kill-region dot (dot)) ) (defun convert-tbl () "Convert a troff tbl-style table to the equivalent html." (interactive) (while (search-forward-regexp "^.TS" nil t) (beginning-of-line) (kill-line 1) (insert ".nf\n.na\n<table align=center border=1 cellpadding=4>\n") (convert-tbl-body (convert-tbl-heading)) ) ) (defun convert-tbl-heading () "Convert the tbl heading up to the ending '.'. For now, all but the first row is ignored. This requires that we don't use extra heading rows for things like bolding fonts, but rather do this directly in the heading lines. NOTE: the following first-row processing is not yet fully implemented. The first row is used to determine the horizontal justification of each column. The local list variable column-justifications holds the HTML column alignment string foreach column. The value of column-justifications is returned." (interactive) (let ((dstart nil) (dend nil) (cstart nil) (cend nil) (column-justifications nil)) (setq dstart (dot)) (search-forward-regexp "\\.$") (next-line 1) (beginning-of-line) (setq dend (dot)) ; OK, now do the computational work of building the column-justifications ; list. ; (search-backward-regexp "^\.TS\\|;$") ; (next-line 1) ; (beginning-of-line) ; (while (not (equal (following-char) ?.)) ; ;(cond ... ) ; (forward-char 1) ; ) ; Finish up by nuking the header stuff and moving on to the body. (kill-region dstart dend) column-justifications ) ) (defun convert-tbl-body (column-justifications) "Convert the tbl body by putting a <tr> in front of each row and replacing each tab with a <td>. It's a bit more complicated than <tr>'ing each line when T{ blocks are used, but we buck up. Also, we unconditionally replace leading spaces with \nbsp;'s, since we assume we want them for indentation. The details of the bucking up are that we put a <tr> in front of each line that isn't in a T{ ... T} block or preceded by a ``. '', which is used to indicate a troff command that we don't want converted to html within a table. Then we nuke the T{'s and T}'s. An important precond to this function is that the first line of the table body is not a T{, since this would throw things off. This is almost certainly an OK precond. If it isn't O<, well then you-know-what me." (interactive) ; NOTE: for now, we're making td-args unconditionally = "valign=top" (let ((td-args "valign=top")) ; (let ((td-args "")) (setq d (dot)) (search-forward-regexp "^\.TE") (beginning-of-line) (kill-line 1) (insert ". br\n</table>\n.fi\n") (search-backward "</table>") (narrow-to-region d (dot)) ; Get rid of "=" on a single line. (beginning-of-buffer) (replace-regexp "^=\n" "") ; Nuke all lines with just a "_". These are forcing line separators in ; troff, but need to be nuked for html. (beginning-of-buffer) (replace-string "_\n" "") ; Nuke the .br's and <br>'s that may have been put in, and convert the ; <p>'s that may have been put in to blank table lines of the for ; <tr> . Well, looks like we don't need to do this anymore. ; (beginning-of-buffer) ; (replace-regexp "^<p>$" "<tr> ") ; (beginning-of-buffer) ; (replace-regexp "^\.br\n" "") ; (beginning-of-buffer) ; (replace-regexp "^\<br>\n" "") ; Put a <tr><td> at the beginning of each line that isn't inside T{ ... T} ; or doesn't start with '.' or "<tr". Put a </td> at the end of the line. (beginning-of-buffer) (while (not (eobp)) (progn ; If we're at the beginning of a line that doesn't start with "." or ; "<tr", then put in the <tr><td+td-args>. Also, replace any leading ; blanks with &npsp;'s. The string value of td-args is computed below. (if (and (bolp) (not (= (following-char) ?.)) (not (string= (buffer-substring (dot) (+ (dot) 3)) "<tr"))) (progn ; Put in the <tr> <td>, nicely indented. (insert (concat " <tr>\n <td " td-args ">")) ; Clear out td-args, cause in the current implementation, they're ; only good for a single line. ; ; NOT any more. At present they're global. ; (setq td-args "") ; Replace leading blanks. (while (= (following-char) ? ) (delete-char 1) (insert " ") ) ; Goto the end of the line. (end-of-line) ) ) ; Check for ".T&" and handle in some way. At present, it does very ; limited parsing to count col spans. Later it may do more. (if (and (bolp) (string= (buffer-substring (dot) (+ (dot) 3)) ".T&")) (let* ((amp-t (convert-tbl-t-ampersand-info)) (colspan (car amp-t))) (if (not (= colspan 0)) t; (setq td-args (concat " colspan=" (int-to-string colspan))) nil; (setq td-args "") ) ) ; At this point, we may or may not be at the beginning of a line. If ; we're not, then we got there by moving to the end of line that ; started with "T}", which in turn we got to by searching forward ; from a "T{". In this case, we check to see if the preceding chars ; are another "T{", in which case we skip to the next "T}". ; Otherwise we just move to the next line. (if (and (not (bobp)) (string= (buffer-substring (dot) (- (dot) 2)) "T{")) (progn (search-forward-regexp "^T}") (end-of-line) ) (progn ; (insert "</td>\n") (next-line 1) (beginning-of-line) ) ) ) ) ) ; Replace each TAB with a "<td>". (beginning-of-buffer) (replace-string " " (concat "\n <td " td-args ">")) ; Nuke "T{" and "T}" (beginning-of-buffer) (replace-string "T{" "") (beginning-of-buffer) (replace-string "T}\n" "") ; Nuking .T& blocks is now done in convert-tbl-t-ampersand-info, q.v. ;(beginning-of-buffer) ;(while (search-forward-regexp "^\.T&" nil t) ; (beginning-of-line) ; (setq d (dot)) ; (search-forward-regexp "[.]$" nil t) ; (forward-char 1) ; (kill-region d (dot)) ;) ; Replace any remaining "\ " (unpaddable spaces) with " ", except in ; a line starting with .(A. We thought about doing this throughout the ; file, but that's probably a bit dangerous, what with how they're used in ; toc files and the like. Anyway, we'll see for now. (beginning-of-buffer) (while (search-forward "\\ " nil t) (beginning-of-line) (if (not (string= (buffer-substring (dot) (+ (dot) 3)) ".(A")) (progn (end-of-line) (setq bound (dot)) (beginning-of-line) (while (search-forward "\\ " bound t) (backward-delete-char 2) (insert " ") (setq bound (+ bound 4)) ) ) (next-line 1) ) ) ; Get on with life. (widen) ) ) (defun convert-tbl-t-ampersand-info () "The comment below is what we might like, but it's the logic to do it is too stupid. So for now, all we do is just nuke the whole .T& block, and live with the ugliness. Maybe later. Deal with tbl .T& command. Return a list of the following format: ( col-span-value ) At present all we do is crudely count the number of 's' chars in the single line that follows the .T& and make that an html colspan value for td-args. This is crude because it does not deal with multiple lines following the .T& nor does it deal with column spans that are not trailing. It's a start anyway. The return value as a list gives us some growing room. When we're done, we nuke the entire .T& block, which spans from the .T& (duh) to the first following line that ends with a period." (interactive) (let ((d nil) (colspan 0) (eol nil)) ; Mark the beginning of the t-amp block (setq d (dot)) ; Count the number of (assumed trailing) 's' chars. ; (next-line 1) ; (end-of-line) ; (setq eol (dot)) ; (beginning-of-line) ; (while (search-forward " s" eol t) ; (setq colspan (1+ colspan)) ; ) ; Search for the end of the block and nuke it. (search-forward-regexp "[.]$" nil t) (forward-char 1) (kill-region d (dot)) ; Return a list of the colspan number. Given the relationship between the ; number of tbl 's' chars vs the meaning of html colspan, we add 1 to a ; non-zero colspan value to get it right. ; (if (not (= colspan 0)) ; (list (1+ colspan)) ; (list 0) ; ) ; Commented out logic is bunk, so just return a list of 0 unconditionally. (list 0) ) ) (defun convert-colors () (replace-string "\\*[red]" "<font color=red>") (beginning-of-buffer) (replace-string "\\*[green]" "<font color=green>") (beginning-of-buffer) (replace-string "\\*[blue]" "<font color=blue>") (beginning-of-buffer) (replace-string "\\*[black]" "</font>") (beginning-of-buffer) (replace-string "\\*[yellow]" "<font color=yellow>") (beginning-of-buffer) (replace-string "\\*[orange]" "<font color=orange>") (beginning-of-buffer) (replace-string "\\*[purple]" "<font color=purple>") (beginning-of-buffer) (replace-string "\\*[darkred]" "<font color=darkred>") ) (defun convert-subscripts-and-superscripts () (replace-string "\\*<" "\\<sub\\>") (beginning-of-buffer) (replace-string "\\*>" "\\</sub\\>") (beginning-of-buffer) (replace-string "\\*{" "\\<sup\\>") (beginning-of-buffer) (replace-string "\\*}" "\\</sup\\>") ) (defun convert-special-chars () "Convert troff Greek letters and other special chars to their funky and cryptic html equivalents. See http://www.w3.org/TR/WD-entities-961125 for defs." (replace-string "\\(sc" "§") (beginning-of-buffer) (replace-string "\\(->" "↔") (beginning-of-buffer) (replace-string "\\(*l" "λ") ; The following don't work in Netscape 4.6 and thereabouts: ;(beginning-of-buffer) ;(replace-string "\\(sp" "⇒") ;(beginning-of-buffer) ;(replace-string "\\(da" "↓") ) (defun convert-embedded-font-change () (interactive) (while (search-forward-regexp "\\\\s\\(.[1-9]+\\)" nil t) (setq m (match-string 1)) (setq d (dot)) (backward-word 2) (backward-char 1) (kill-region (dot) d) (insert (concat "<font size=" m ">")) (search-forward-regexp "\\(\\\\s.[1-9]+\\)" nil t) (setq m (match-string 1)) (setq d (dot)) (backward-word 2) (backward-char 1) (kill-region (dot) d) (insert "</font>") ) ) (defun convert-no-fill-block () (interactive) (nroff-mode) (setq case-fold-search nil) (while (search-forward-regexp "^\\.(n" nil t) (beginning-of-line) (setq d (dot)) (forward-char 1) (forward-sexp) (beginning-of-line) (kill-line 1) (insert-string ".br\n</blockquote>\n.br\n") (previous-line 3) (narrow-to-region (dot) d) (beginning-of-buffer) (kill-line 1) (insert-string ".br\n<blockquote>\n.br\n") (forward-line 1) (while (not (eobp)) (cond ((string= (buffer-substring (dot) (+ (dot) 3)) ".(n") (kill-line 1) (insert-string ".br\n<blockquote>\n.br\n") ) ((string= (buffer-substring (dot) (+ (dot) 3)) ".)n") (kill-line 1) (insert-string ".br\n</blockquote>\n.br\n") ) ((string= (buffer-substring (dot) (+ (dot) 3)) ".sp") (forward-line 1) ) (t (insert-string ".br\n<br>\n.br\n") ) ) (forward-line 1) ) (widen) ) )