meso-web

Sources of the |Méso|Star> website
git clone git://git.meso-star.fr/meso-web.git
Log | Files | Refs | README | LICENSE

commit f7f0846dfdf7d8f8e4860db7e313df4818e36daf
parent aa3b7008c448d75d3265bb7735d33b88a0e64085
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Wed,  8 Feb 2023 16:44:57 +0100

Fix the convert_man.sh script

Man pages generated by the scdoc program are not well converted by
man2html. This validation adds <pre|post>-treatments to solve these
problems. In addition, we move the post-processing performed by tidy
after sed because changes made by tidy can break sed editing rules.

Diffstat:
Mconvert_man.sh | 46+++++++++++++++++++++++++++++-----------------
1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/convert_man.sh b/convert_man.sh @@ -39,30 +39,42 @@ man_section=$(echo "${input}" \ echo "<header>" echo " <h1>${man_name}(${man_section})</h1>"; echo "</header>" - man2html -r "${input}" \ + + # First, we fix the input man to avoid unexpected html conversions. For + # example, the .P macros are not handled by man2html and the .PP macro followed + # by a .RE macro is badly converted while the reverse works, i.e. the .RE macro + # followed by the .PP macro. + # + # After html conversion, the output file is modified to remove its header and + # its footer (that are replaced by ours), and to fix some html patterns that + # are not correctly handled by tidy (see below) + sed 's/^\.P$/.PP/' ${input} \ + | sed '/^\.PP$/{$!{N;s/\.PP\n\.RE/.RE\n.PP/;t sub;P;D;:sub}}' \ + | man2html -r \ | tail -n +10 \ | sed '/^<HR>$/,$d' \ | sed '/\&nbsp;/d' \ - | sed 's/<DL COMPACT>/<DL>/g' + | sed 's/<DL COMPACT>/<DL>/g' \ + | sed '/^<PRE>$/{$!{N;s/<PRE>\n<DL><DT><DD>/<DL><DT><DD><PRE>/;t sub;P;D;:sub}}' print_footer } > "${output}" -# Clean the html file generated by man2html and lint it if necessary -tidy --show-info no -m "${output}" > /dev/null 2>&1 || sh lint_html.sh "${output}" - # Remove hyperlink on csplit, feh, gnuplot, sed, mmap and mpirun commands -sed -i 's/<B><A HREF="\.\.\/man1\/csplit\.1\.html">csplit<\/A><\/B>/csplit/g' "${output}" -sed -i 's/<B><A HREF="\.\.\/man1\/feh\.1\.html">feh<\/A><\/B>/feh/g' "${output}" -sed -i 's/<B><A HREF="\.\.\/man1\/gnuplot\.1\.html">gnuplot<\/A><\/B>/gnuplot/g' "${output}" -sed -i 's/<B><A HREF="\.\.\/man1\/sed\.1\.html">sed<\/A><\/B>/sed/g' "${output}" -sed -i 's/<B><A HREF="\.\.\/man1\/mpirun\.1\.html">mpirun<\/A><\/B>/mpirun/g' "${output}" -sed -i 's/<B><A HREF="\.\.\/man2\/mmap\.2\.html">mmap<\/A><\/B>/mmap/g' "${output}" - # Fix the hyperlink toward the GPLv3+ license -sed -i 's/gpl\.html\.">\(.*\).<\/A>/gpl.html">\1<\/A>./g' "${output}" - # Remove unexpected link on sqrt(2) -sed -i 's/<A HREF="\.\.\/man2\/sqrt\.2\.html">sqrt<\/A>/sqrt/g' "${output}" - # Fix the man2html issues that translates the ' char in cq -sed -i "s/cq\([a-z]\)\>/'\1/g" "${output}" +sed\ + -e 's/<B><A HREF="\.\.\/man1\/csplit\.1\.html">csplit<\/A><\/B>/csplit/g'\ + -e 's/<B><A HREF="\.\.\/man1\/feh\.1\.html">feh<\/A><\/B>/feh/g'\ + -e 's/<B><A HREF="\.\.\/man1\/gnuplot\.1\.html">gnuplot<\/A><\/B>/gnuplot/g'\ + -e 's/<B><A HREF="\.\.\/man1\/sed\.1\.html">sed<\/A><\/B>/sed/g'\ + -e 's/<B><A HREF="\.\.\/man1\/mpirun\.1\.html">mpirun<\/A><\/B>/mpirun/g'\ + -e 's/<B><A HREF="\.\.\/man2\/mmap\.2\.html">mmap<\/A><\/B>/mmap/g'\ + -e 's/gpl\.html\.">\(.*\).<\/A>/gpl.html">\1<\/A>./g'\ + -e 's/<A HREF="\.\.\/man2\/sqrt\.2\.html">sqrt<\/A>/sqrt/g'\ + -e "s/cq\([a-z]\)\>/'\1/g" <<EOF >"${output}" +$(cat "${output}") +EOF + +# Clean the html file generated by man2html and lint it if necessary +tidy --show-info no -m "${output}" > /dev/null 2>&1 || sh lint_html.sh "${output}"