;
; This file contains conversion functions for translating a text file in troff
; -me format to an equivalent file in html format.  These functions are
; generally not used within emacs.  Rather, their normal usage is via the UNIX
; shell script defined in ~gfisher/bin/me2html, q.v.  (This script is also
; defined as an alias in ~gfisher/.cshrc).  See also the shell script in
; ~gfisher/bin/me2hmtl1, which me2hmtl calls.
;
; Here is the the general strategy for how the emacs conversion functions are
; used:
;
;    * An editable base file is maintained in troff -me format, with a .me
;      extension.  A corresponding html file is mechanically generated from the
;      .me file.
;
;    * In order for the conversion to operate correctly, the auxiliay troff
;      macros defined in ~gfisher/nroff/stdhdr.me must be used.  Specifically,
;      the troff file should begin with the following initial command sequence:
;
;		.sz 11
;		.ds ~ /usersgfisher/nroff
;		.so \*~/stdhdr.me
;
;      (This sets the font to 11 point.  For a different font size, change the
;      argument of the .sz command accordingly).
;
;    * While the format of the generated html file is reasonably readable, it
;      is not intended for direct editing.  Rather, editing should be done to
;      .me file and the conversion rerun.
;
;    * To embed html commands in a .me file, the html commands are guarded with
;      a troff conditional of the form ".if \nh".  By convention, the troff
;      register "h" is used to signify html processing, and hence the -rh
;      command-line switch is given to troff to enable html commands (see
;      below).
;
;    * WARNING: To embed html commands, the "<" of the command sequence MUST
;      be the first char on the line, and the html command must be the only
;      thing on the line.  This restrictrition exists in order that non-html
;      usages of "<", ">", "&" can be converted to &lt, &gt, and &amp, resp.
;
;    * To create a .html file from a .me file, two passes are necessary.  The
;      first pass uses the me2html function in this file in batch mode.  This
;      pass adds html commands that either replace or augment the equivalent
;      .me commands (see me2html defun code for further details).  The second
;      pass runs nroff on the result of the 1st pass converter, putting the
;      result in a pure html file.  Both passes can be run in a single UNIX
;      command stream, as follows:
;
;          me2html1 $1 | gtbl | groff -rh1 -me -Tascii | col -b > $1.html
;
;      This command stream constitutes the defintion of the script in
;      ~gfisher/bin/me2html.  me2html1 (q.v.) is the lower-level CShell script
;      that runs this .el file in emacs batch mode.  Note the "-rh1" arg to
;      groff, which turns on formatting for html (no headers, no hyphenation,
;      etc.).  Note also that me2html1 is a low-level script that is not
;      intended to be used directly at the top-level UNIX shell.
;
;    * As an alternative to explicit insertion of troff conditionals, a base
;      file with an extension .meh can be maintained as the progenetor of pure
;      .me and .html files.  A .meh file contains both -me commands and
;      unguarded html title and anchor commands.
;
;    * To run a .meh file through a normal groff command stream, an additional
;      filter is placed at the very begining of the command stream.  E.g., with
;      a plain .me file, the following  is used to generate PostScript:
;
;          gtbl \!:1 | groff -me > \!:1:r.ps
;
;      whereas to handle a .meh file, the unhtml filter is added as follows
;
;          unhtml \!:1 | gtbl | groff -me > \!:1:r.ps
;
;      In current practice, .meh files are rarely used.  Instead, html title
;      and anchor commands are guarded with explicit troff conditionals.  In
;      particular, there is now a troff macro file called html.me that contains
;      some handy macros that work conditionally depending on if a file is
;      being processed for html or troff.
;
; For a largish example of a .me file with embedded html, see
; ~gfisher/src/rsl/doc/ref-man/ref-man.me.  The companion .html file in the
; same directory was created using me2html.
;
;

(defun dot () (point))

(defun me2html ()
  "Convert -me formatting commands to their html equivalents."
    (interactive)
    (setq case-fold-search nil)
    (message (buffer-name))
    (message "%d" (buffer-size))
    (beginning-of-buffer)
    (convert-html-escape-chars)
    (beginning-of-buffer)
    (replace-regexp "^\.nf" ".br\n<pre>\n.nf")
    (beginning-of-buffer)
    (replace-regexp "^\.fi" ".br\n</pre>\n.fi")
    (beginning-of-buffer)
    (replace-regexp "^\.(i" ".br\n<blockquote>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.)i" ".br\n</blockquote>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.(l F" ".br\n<blockquote>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.)l F" ".br\n</blockquote>\n.br")
    (beginning-of-buffer)
    ; NOTE: .na is now global, so no explicit .ad or na commands appear in
    ;        conversions of .(n, (l, and .(t.
    (replace-regexp "^\.(n" ".nf\n<blockquote><pre>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.)n" ".fi\n</pre></blockquote>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.(l" ".nf\n<blockquote><pre>")
    (beginning-of-buffer)
    (replace-regexp "^\.)l" ".fi\n</pre></blockquote>")
    (beginning-of-buffer)
    (replace-regexp "^\.(t" ".nf\n<blockquote><pre>\n.br")
;    (replace-regexp "^\.(t" "<blockquote><pre><tt>")
    (beginning-of-buffer)
    (replace-regexp "^\.)t" ".fi\n</pre></blockquote>\n.br")
;    (replace-regexp "^\.)t" ".br\n</tt></pre></blockquote>")
    (beginning-of-buffer)
    (replace-regexp "^\.bp" ".br\n<pre>\n\n</pre>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.TS" ".br\n<pre>\n.TS")
    (beginning-of-buffer)
    (replace-regexp "^\.TE" ".br\n.TE\n</pre>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.pp" ".br\n<p>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.lp" ".br\n<p>\n.br")
    (beginning-of-buffer)
    (convert-font "B" "strong" "/strong")
    (beginning-of-buffer)
    (convert-font "I" "em" "/em")
    (beginning-of-buffer)
    (convert-font "C" "tt" "/tt")
    (beginning-of-buffer)
    (convert-font "(CI" "em><tt" "/em></tt")
    (beginning-of-buffer)
    (convert-font "(CB" "strong><tt" "/strong></tt")
    (beginning-of-buffer)
    (convert-font "H" "tt" "/tt")
    (beginning-of-buffer)
    (convert-font "(HI" "em><tt" "/em></tt")
    (beginning-of-buffer)
    (convert-font "(HB" "strong><tt" "/strong></tt")

    (beginning-of-buffer)
    ; Next line slightly preempts convert-sp.
    (replace-regexp "^\.sp\n" ".br\n<pre></pre>\n.br\n")
;    (replace-string "\n\n" "\n.br\n<pre></pre>\n.br\n")
    (beginning-of-buffer)
    (convert-sp)
    (beginning-of-buffer)
    (convert-pspic)
    (beginning-of-buffer)
    (convert-sections)
    (beginning-of-buffer)
    (convert-lists)
    (beginning-of-buffer)
    (convert-centering)
    (beginning-of-buffer)
    (replace-regexp "^\.hl" ".br\n<hr>\n.br")
    (beginning-of-buffer)
    (echo-file)
)

(defun unhtml ()
    (unhtitle)
    (unanchor)
    (echo-file)
)

(defun convert-font (mf hf ef)
  "Convert a -me font-change segment to the equivalent html."
    (interactive "s" "s" "s")
    (beginning-of-buffer)
    (while (search-forward (concat "\\f" mf) nil t)
      (progn
	(backward-delete-char (+ 2 (length mf)))
	(insert-string (concat "<" hf ">"))
	(if (not (search-forward "\\fP" nil t))
	  (progn
	    (message "%s%d" "missing \\fP at position" (dot))
	    (what-line)))
	(backward-delete-char 3)
	(insert-string (concat "<" ef ">"))
      )
    )
)

(defun convert-sections ()
  "Add equivalent html commands around extant .me section commands."
    (interactive)
    (beginning-of-buffer)
    (while (re-search-forward "^.sh " nil t)
      (progn
	(setq d (dot))
	(forward-word 1)
	(setq n (buffer-substring d (dot)))
	(setq nn (+ 1 (string-to-int n)))
	(setq n (int-to-string nn))
	(beginning-of-line)
	(insert-string "\n<h" n ">\n")
	(forward-line 1)
	(insert-string ".br\n</h" n ">\n")
      )
    )
    ; Now do the .uh's as <h2>'s
    (beginning-of-buffer)
    (while (search-forward "\n.uh " nil t)
      (progn
	(setq d (dot))
	(forward-word 1)
	(setq n (buffer-substring d (dot)))
	(beginning-of-line)
	(insert-string "\n<h" 2 ">\n")
	(forward-line 1)
	(insert-string ".br\n</h" 2 ">\n")
      )
    )
)

(defun convert-sp-too-simple ()
  (interactive)
  (beginning-of-buffer)
  (while (search-forward "\n.sp " nil t)
    (progn
      (beginning-of-line)
      (insert-string ".br\n<pre>\n")
      (next-line 1)
      (insert-string "</pre>\n")
    )
  )
)

(defun convert-sp ()
"Convert an -me spacing command of the form \".sp Nv\" to (weakder) html spacing.
Specifically, round N up to the nearest int, and convert to that many spaces.
Hence, if N >= .5, then one space will be thrown.  Throwing a space in html is
done with \"<pre>\\n</pre>\"."
  (interactive)
  (while (search-forward "\n.sp" nil t)
    (setq l 0)
    (move-over-whitespace)
    (if (not (= (following-char) ?.))
	(setq l (get-next-int))
    )
    (if (= (following-char) ?.)
	(progn
	  (forward-char 1)
	  (setq f (get-next-int))
	  (if (>= f 5)
	      (setq l (+ l 1))
	  )
	)
    )
    (beginning-of-line)
    (kill-line 1)
    (if (> l 0)
      (progn
	(setq l (- l 1))
	(insert-string ".br\n<pre>\n")
	(while (>= (setq l (- l 1)) 0)
	  (insert-string "\n")
	)
	(insert-string ".br\n</pre>\n.br\n")
      )
      (insert-string ".hbr\n")
    )
  )
)

(defun get-next-int ()
 (interactive)
  (setq m (dot))
  (move-over-digits)
  (string-to-int (buffer-substring m (dot)))
)

(defun move-over-digits ()
  (interactive)
  (while (isdigit (following-char)) (forward-char 1))
)

(defun isdigit (c)
  (interactive)
  (and (>= c ?0) (<= c ?9))
)

(defun move-over-whitespace ()
"Move forward over any whitespace chars in from of dot."
  (interactive)
  (while (equal (char-syntax (following-char)) ? ) (forward-char 1))
)

(defun convert-pspic () )

(defun convert-lists ()
"Convert -me numbered and bulleted lists to (weaker) html format.
Specifically, all numbered lists go to <ol>'s and bulleted lists goto
to <ul>'s.  Note that as of June 97, Netscape supports a type parameter
in lists, which we now use here.  Hopefully this or something like it
will become a standard HTML 3 feature."
    (interactive)
    (replace-regexp "^\.(E.*" ".br\n<ul>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.)E" ".br\n</ul>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\\.(L.\\(.\\)" ".br\n<ol type=\\1>\n.br")
    (beginning-of-buffer)
    ;Next line is way hokey, since replace-regexp pass should've done it.
    ;What's going on is that we're getting rid of possibly present 2nd and 3rd
    ;args in a .(L
    (replace-regexp "\\(<ol.*>\\).*" "\\1")
    (beginning-of-buffer)
    (replace-regexp "\.)L.*" ".br\n</ol>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.ee" ".br\n<li>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.le" ".br\n<li>\n.br")
)

(defun convert-centering ()
"Convert -me centering of the form \".(C ... )C\" by surrounding it with \"<pre> 
... /<pre>\".  Later, we'll convert to html+ \"<p align=center> ... </p>\"."
    (interactive)
;    (replace-regexp "^\.(C" "<pre>\n.(C")
;    (beginning-of-buffer)
;    (replace-regexp "^\.)C" ".)C\n</pre>")
;    (beginning-of-buffer)
    (replace-regexp "^\.(C" ".br\n<p align=center>\n.br")
    (beginning-of-buffer)
    (replace-regexp "^\.)C" ".br\n</p>\n.br")
)

(defun convert-html-escape-chars ()
"Convert non-html usages <, >, and  & to &lt, &gt, and &amp, respectively.
See restrictions on html command sequencens in doc at top of me2html.el.
Serious hack in this regard is not converting ]> to ]%lt since ]> is
a troff refer macro."
    (interactive)
    (while (re-search-forward "[^\\]<\\|[^\\]>\\|&" nil t)
      (convert-one-html-escape-char)
    )
    ; Next two lines cause cant get "[^]]>\\|" to work in preceding re-search.
    (beginning-of-buffer)
    (replace-string "]&gt" "]>")
    (beginning-of-buffer)
    (replace-string "]&lt" "]<")
)

(defun preceding-preceding-char (n)
 (progn
  (interactive)
  (backward-char n)
  (setq ch (preceding-char))
  (forward-char n)
  ch))

(defun convert-one-html-escape-char ()
    (interactive)
    (if (and (= (preceding-char) ?<) (= (preceding-preceding-char 1) ?\n))
	(forward-line 1)
        (progn
	  (if (or
	       (and
		(= (preceding-char) ?&)
		(= (preceding-preceding-char 1) ?\\))
	       (and
		(= (preceding-char) ?>)
		(= (preceding-preceding-char 1) ?-)
;		(= (preceding-preceding-char 2) ?\()
;		(= (preceding-preceding-char 3) ?\\)
	       )
	      )
	      nil
	      (progn
		(if (= (preceding-char) ?<)
		    (setq ch "lt")
		    (if (= (preceding-char) ?>)
			(setq ch "gt")
			(if (= (preceding-char) ?&)
			    (setq ch "amp")
			)
		    )
		 )
		(backward-delete-char 1)
		(insert-string "&" ch)
)))))


(defun echo-file ()
  "Run this in batch mode to test streaming through emacs."
    (interactive)
    (message "$$$$$$$$$$$$$$$$$$$ REAL BEGINNING $$$$$$$$$$$$$$$$$$$")
    (while (not (eobp))
      (progn
	(setq d (dot))
	(end-of-line)
	(message "%s" (buffer-substring d (dot)))
	(beginning-of-line)
	(forward-line 1)
      )
    )
)

(defun unhtitle ()
    (interactive)
    (search-forward "<title>" nil t)
    (search-backward "<")
    (setq dot (dot))
    (search-forward "</title>" nil t)
    (next-line 1)
    (beginning-of-line)
    (kill-region dot (dot))
)