commit f7f0846dfdf7d8f8e4860db7e313df4818e36daf
parent aa3b7008c448d75d3265bb7735d33b88a0e64085
Author: Vincent Forest <vincent.forest@meso-star.com>
Date: Wed, 8 Feb 2023 16:44:57 +0100
Fix the convert_man.sh script
Man pages generated by the scdoc program are not well converted by
man2html. This validation adds <pre|post>-treatments to solve these
problems. In addition, we move the post-processing performed by tidy
after sed because changes made by tidy can break sed editing rules.
Diffstat:
1 file changed, 29 insertions(+), 17 deletions(-)
diff --git a/convert_man.sh b/convert_man.sh
@@ -39,30 +39,42 @@ man_section=$(echo "${input}" \
echo "<header>"
echo " <h1>${man_name}(${man_section})</h1>";
echo "</header>"
- man2html -r "${input}" \
+
+ # First, we fix the input man to avoid unexpected html conversions. For
+ # example, the .P macros are not handled by man2html and the .PP macro followed
+ # by a .RE macro is badly converted while the reverse works, i.e. the .RE macro
+ # followed by the .PP macro.
+ #
+ # After html conversion, the output file is modified to remove its header and
+ # its footer (that are replaced by ours), and to fix some html patterns that
+ # are not correctly handled by tidy (see below)
+ sed 's/^\.P$/.PP/' ${input} \
+ | sed '/^\.PP$/{$!{N;s/\.PP\n\.RE/.RE\n.PP/;t sub;P;D;:sub}}' \
+ | man2html -r \
| tail -n +10 \
| sed '/^<HR>$/,$d' \
| sed '/\ /d' \
- | sed 's/<DL COMPACT>/<DL>/g'
+ | sed 's/<DL COMPACT>/<DL>/g' \
+ | sed '/^<PRE>$/{$!{N;s/<PRE>\n<DL><DT><DD>/<DL><DT><DD><PRE>/;t sub;P;D;:sub}}'
print_footer
} > "${output}"
-# Clean the html file generated by man2html and lint it if necessary
-tidy --show-info no -m "${output}" > /dev/null 2>&1 || sh lint_html.sh "${output}"
-
# Remove hyperlink on csplit, feh, gnuplot, sed, mmap and mpirun commands
-sed -i 's/<B><A HREF="\.\.\/man1\/csplit\.1\.html">csplit<\/A><\/B>/csplit/g' "${output}"
-sed -i 's/<B><A HREF="\.\.\/man1\/feh\.1\.html">feh<\/A><\/B>/feh/g' "${output}"
-sed -i 's/<B><A HREF="\.\.\/man1\/gnuplot\.1\.html">gnuplot<\/A><\/B>/gnuplot/g' "${output}"
-sed -i 's/<B><A HREF="\.\.\/man1\/sed\.1\.html">sed<\/A><\/B>/sed/g' "${output}"
-sed -i 's/<B><A HREF="\.\.\/man1\/mpirun\.1\.html">mpirun<\/A><\/B>/mpirun/g' "${output}"
-sed -i 's/<B><A HREF="\.\.\/man2\/mmap\.2\.html">mmap<\/A><\/B>/mmap/g' "${output}"
-
# Fix the hyperlink toward the GPLv3+ license
-sed -i 's/gpl\.html\.">\(.*\).<\/A>/gpl.html">\1<\/A>./g' "${output}"
-
# Remove unexpected link on sqrt(2)
-sed -i 's/<A HREF="\.\.\/man2\/sqrt\.2\.html">sqrt<\/A>/sqrt/g' "${output}"
-
# Fix the man2html issues that translates the ' char in cq
-sed -i "s/cq\([a-z]\)\>/'\1/g" "${output}"
+sed\
+ -e 's/<B><A HREF="\.\.\/man1\/csplit\.1\.html">csplit<\/A><\/B>/csplit/g'\
+ -e 's/<B><A HREF="\.\.\/man1\/feh\.1\.html">feh<\/A><\/B>/feh/g'\
+ -e 's/<B><A HREF="\.\.\/man1\/gnuplot\.1\.html">gnuplot<\/A><\/B>/gnuplot/g'\
+ -e 's/<B><A HREF="\.\.\/man1\/sed\.1\.html">sed<\/A><\/B>/sed/g'\
+ -e 's/<B><A HREF="\.\.\/man1\/mpirun\.1\.html">mpirun<\/A><\/B>/mpirun/g'\
+ -e 's/<B><A HREF="\.\.\/man2\/mmap\.2\.html">mmap<\/A><\/B>/mmap/g'\
+ -e 's/gpl\.html\.">\(.*\).<\/A>/gpl.html">\1<\/A>./g'\
+ -e 's/<A HREF="\.\.\/man2\/sqrt\.2\.html">sqrt<\/A>/sqrt/g'\
+ -e "s/cq\([a-z]\)\>/'\1/g" <<EOF >"${output}"
+$(cat "${output}")
+EOF
+
+# Clean the html file generated by man2html and lint it if necessary
+tidy --show-info no -m "${output}" > /dev/null 2>&1 || sh lint_html.sh "${output}"