#!/bin/bash
-# VERSION: 20050112.0027
+# VERSION: 20080421.1623
#
# Compress (with bzip2 or gzip) all man pages in a hierarchy and
# update symlinks - By Marc Heerdink <marc @ koelkast.net>
# compression level, to parse the man.conf for all occurrences of MANPATH,
# to allow for a backup, to allow to keep the newest version of a page.
#
-# Modified 20040330 by Tushar Teredesai to replace $0 by the name of the
+# Modified 20040330 by Tushar Teredesai to replace $0 by the name of the
# script.
# (Note: It is assumed that the script is in the user's PATH)
#
# Modified 20050112 by Randy McMurchy to shorten line lengths and
# correct grammar errors.
#
+# Modified 20060128 by Alexander E. Patrakov for compatibility with Man-DB.
+#
+# Modified 20060311 by Archaic to use Man-DB manpath utility which is a
+# replacement for man --path from Man.
+#
+# Modified 20080421 by Dan Nicholson to properly execute the correct
+# compressdoc when working recursively. This means the same compressdoc
+# will be used whether a full path was given or it was resolved from PATH.
+#
+# Modified 20080421 by Dan Nicholson to be more robust with directories
+# that don't exist or don't have sufficient permissions.
+#
+# Modified 20080421 by Lars Bamberger to (sort of) automatically choose
+# a compression method based on the size of the manpage. A couple bug
+# fixes were added by Dan Nicholson.
+#
+# Modified 20080421 by Dan Nicholson to suppress warnings from manpath
+# since these are emitted when $MANPATH is set. Removed the TODO for
+# using the $MANPATH variable since manpath(1) handles this already.
+#
# TODO:
# - choose a default compress method to be based on the available
# tool : gzip or bzip2;
-# - offer an option to automagically choose the best compression
-# methed on a per page basis (eg. check which of
-# gzip/bzip2/whatever is the most effective, page per page);
-# - when a MANPATH env var exists, use this instead of /etc/man.conf
-# (useful for users to (de)compress their man pages;
# - offer an option to restore a previous backup;
# - add other compression engines (compress, zip, etc?). Needed?
--gzip, --gz, -g
--bzip2, --bz2, -b
Compress using gzip or bzip2.
+ --automatic
+ Compress using either gzip or bzip2, depending on the
+ size of the file to be compressed. Files larger than 5
+ kB are bzipped, files larger than 1 kB are gzipped and
+ files smaller than 1 kB are not compressed.
--decompress, -d
Decompress the man pages.
--backup Specify a .tar backup shall be done for all directories.
- In case a backup already exists, it is saved as .tar.old
- prior to making the new backup. If a .tar.old backup
+ In case a backup already exists, it is saved as .tar.old
+ prior to making the new backup. If a .tar.old backup
exists, it is removed prior to saving the backup.
In backup mode, no other action is performed.
And where options are :
-1 to -9, --fast, --best
- The compression level, as accepted by gzip and bzip2.
- When not specified, uses the default compression level
- for the given method (-6 for gzip, and -9 for bzip2).
+ The compression level, as accepted by gzip and bzip2.
+ When not specified, uses the default compression level
+ for the given method (-6 for gzip, and -9 for bzip2).
Not used when in backup or decompress modes.
- --force, -F Force (re-)compression, even if the previous one was
- the same method. Useful when changing the compression
- ratio. By default, a page will not be re-compressed if
- it ends with the same suffix as the method adds
+ --force, -F Force (re-)compression, even if the previous one was
+ the same method. Useful when changing the compression
+ ratio. By default, a page will not be re-compressed if
+ it ends with the same suffix as the method adds
(.bz2 for bzip2, .gz for gzip).
- --soft, -S Change hard-links into soft-links. Use with _caution_
- as the first encountered file will be used as a
+ --soft, -S Change hard-links into soft-links. Use with _caution_
+ as the first encountered file will be used as a
reference. Not used when in backup mode.
- --hard, -H Change soft-links into hard-links. Not used when in
+ --hard, -H Change soft-links into hard-links. Not used when in
backup mode.
--conf=dir, --conf dir
- Specify the location of man.conf. Defaults to /etc.
+ Specify the location of man_db.conf. Defaults to /etc.
- --verbose, -v Verbose mode, print the name of the directory being
- processed. Double the flag to turn it even more verbose,
+ --verbose, -v Verbose mode, print the name of the directory being
+ processed. Double the flag to turn it even more verbose,
and to print the name of the file being processed.
- --fake, -f Fakes it. Print the actual parameters compman will use.
+ --fake, -f Fakes it. Print the actual parameters compressdoc will use.
- dirs A list of space-separated _absolute_ pathnames to the
- man directories. When empty, and only then, parse
- ${MAN_CONF}/man.conf for all occurrences of MANPATH.
+ dirs A list of space-separated _absolute_ pathnames to the
+ man directories. When empty, and only then, use manpath
+ to parse ${MAN_CONF}/man_db.conf for all valid occurrences
+ of MANDATORY_MANPATH.
Note about compression:
There has been a discussion on blfs-support about compression ratios of
both gzip and bzip2 on man pages, taking into account the hosting fs,
the architecture, etc... On the overall, the conclusion was that gzip
- was much more efficient on 'small' files, and bzip2 on 'big' files,
+ was much more efficient on 'small' files, and bzip2 on 'big' files,
small and big being very dependent on the content of the files.
- See the original post from Mickael A. Peters, titled
+ See the original post from Mickael A. Peters, titled
"Bootable Utility CD", dated 20030409.1816(+0200), and subsequent posts:
http://linuxfromscratch.org/pipermail/blfs-support/2003-April/038817.html
- On my system (x86, ext3), man pages were 35564KB before compression.
- gzip -9 compressed them down to 20372KB (57.28%), bzip2 -9 got down to
+ On my system (x86, ext3), man pages were 35564KB before compression.
+ gzip -9 compressed them down to 20372KB (57.28%), bzip2 -9 got down to
19812KB (55.71%). That is a 1.57% gain in space. YMMV.
- What was not taken into consideration was the decompression speed. But
- does it make sense to? You gain fast access with uncompressed man
- pages, or you gain space at the expense of a slight overhead in time.
+ What was not taken into consideration was the decompression speed. But
+ does it make sense to? You gain fast access with uncompressed man
+ pages, or you gain space at the expense of a slight overhead in time.
Well, my P4-2.5GHz does not even let me notice this... :-)
EOT
) | less
}
-# This function checks that the man page is unique amongst bzip2'd,
+# This function checks that the man page is unique amongst bzip2'd,
# gzip'd and uncompressed versions.
# $1 the directory in which the file resides
# $2 the file name for the man page
-# Returns 0 (true) if the file is the latest and must be taken care of,
-# and 1 (false) if the file is not the latest (and has therefore been
+# Returns 0 (true) if the file is the latest and must be taken care of,
+# and 1 (false) if the file is not the latest (and has therefore been
# deleted).
function check_unique ()
{
# Name of the script
MY_NAME=`basename $0`
-# OK, parse the command-line for arguments, and initialize to some
-# sensible state, that is: don't change links state, parse
-# /etc/man.conf, be most silent, search man.conf in /etc, and don't
+# OK, parse the command-line for arguments, and initialize to some
+# sensible state, that is: don't change links state, parse
+# /etc/man_db.conf, be most silent, search man_db.conf in /etc, and don't
# force (re-)compression.
COMP_METHOD=
COMP_SUF=
COMP_METHOD=$1
shift
;;
+ --automatic)
+ COMP_SUF=TBD
+ COMP_METHOD=$1
+ shift
+ ;;
--decompress|-d)
COMP_SUF=
COMP_LVL=
exit 1
;;
*)
- echo "\"$1\" is not an absolute path name"
+ echo "\"$1\" is not an absolute path name"
exit 1
;;
esac
;;
esac
-# Note: on my machine, 'man --path' gives /usr/share/man twice, once
+# Note: on my machine, 'man --path' gives /usr/share/man twice, once
# with a trailing '/', once without.
if [ -z "$MAN_DIR" ]; then
- MAN_DIR=`man --path -C "$MAN_CONF"/man.conf \
+ MAN_DIR=`manpath -q -C "$MAN_CONF"/man_db.conf \
| sed 's/:/\\n/g' \
| while read foo; do dirname "$foo"/.; done \
| sort -u \
| while read bar; do echo -n "$bar "; done`
fi
-# If no MANPATH in ${MAN_CONF}/man.conf, abort as well
+# If no MANDATORY_MANPATH in ${MAN_CONF}/man_db.conf, abort as well
if [ -z "$MAN_DIR" ]; then
- echo "No directory specified, and no directory found with \`man --path'"
+ echo "No directory specified, and no directory found with \`manpath'"
exit 1
fi
+# Check that the specified directories actually exist and are readable
+for DIR in $MAN_DIR; do
+ if [ ! -d "$DIR" -o ! -r "$DIR" ]; then
+ echo "Directory '$DIR' does not exist or is not readable"
+ exit 1
+ fi
+done
+
# Fake?
if [ "$FAKE" != "no" ]; then
echo "Actual parameters used:"
echo -n "Compression.......: "
case $COMP_METHOD in
--bzip2|--bz2|-b) echo -n "bzip2";;
- --gzip|__gz|-g) echo -n "gzip";;
+ --gzip|--gz|-g) echo -n "gzip";;
+ --automatic) echo -n "compressing";;
--decompress|-d) echo -n "decompressing";;
*) echo -n "unknown";;
esac
echo "Compression suffix: $COMP_SUF"
echo -n "Force compression.: "
[ "foo$FORCE_OPT" = "foo-F" ] && echo "yes" || echo "no"
- echo "man.conf is.......: ${MAN_CONF}/man.conf"
+ echo "man_db.conf is....: ${MAN_CONF}/man_db.conf"
echo -n "Hard-links........: "
- [ "foo$LN_OPT" = "foo-S" ] &&
+ [ "foo$LN_OPT" = "foo-S" ] &&
echo "convert to soft-links" || echo "leave as is"
echo -n "Soft-links........: "
- [ "foo$LN_OPT" = "foo-H" ] &&
+ [ "foo$LN_OPT" = "foo-H" ] &&
echo "convert to hard-links" || echo "leave as is"
echo "Backup............: $BACKUP"
echo "Faking (yes!).....: $FAKE"
if [ "$BACKUP" = "yes" ]; then
for DIR in $MAN_DIR; do
cd "${DIR}/.."
+ if [ ! -w "`pwd`" ]; then
+ echo "Directory '`pwd`' is not writable"
+ exit 1
+ fi
DIR_NAME=`basename "${DIR}"`
echo "Backing up $DIR..." > $DEST_FD0
[ -f "${DIR_NAME}.tar.old" ] && rm -f "${DIR_NAME}.tar.old"
- [ -f "${DIR_NAME}.tar" ] &&
+ [ -f "${DIR_NAME}.tar" ] &&
mv "${DIR_NAME}.tar" "${DIR_NAME}.tar.old"
- tar -cfv "${DIR_NAME}.tar" "${DIR_NAME}" > $DEST_FD1
+ tar -cvf "${DIR_NAME}.tar" "${DIR_NAME}" > $DEST_FD1
done
exit 0
fi
# I need to take into account the localized man, so I'm going recursive
for DIR in $MAN_DIR; do
MEM_DIR=`pwd`
+ if [ ! -w "$DIR" ]; then
+ echo "Directory '$DIR' is not writable"
+ exit 1
+ fi
cd "$DIR"
for FILE in *; do
# Fixes the case were the directory is empty
if [ "foo$FILE" = "foo*" ]; then continue; fi
# Fixes the case when hard-links see their compression scheme change
- # (from not compressed to compressed, or from bz2 to gz, or from gz
+ # (from not compressed to compressed, or from bz2 to gz, or from gz
# to bz2)
- # Also fixes the case when multiple version of the page are present,
+ # Also fixes the case when multiple version of the page are present,
# which are either compressed or not.
if [ ! -L "$FILE" -a ! -e "$FILE" ]; then continue; fi
if [ "$FILE" = "whatis" ]; then continue; fi
if [ -d "$FILE" ]; then
- cd "${MEM_DIR}" # Go back to where we ran "$0",
- # in case "$0"=="./compressdoc" ...
# We are going recursive to that directory
echo "-> Entering ${DIR}/${FILE}..." > $DEST_FD0
# I need not pass --conf, as I specify the directory to work on
- # But I need exit in case of error
- "$MY_NAME" ${COMP_METHOD} ${COMP_LVL} ${LN_OPT} ${VERBOSE_OPT} \
- ${FORCE_OPT} "${DIR}/${FILE}" || exit 1
+ # But I need exit in case of error. We must change back to the
+ # original directory so $0 is resolved correctly.
+ (cd "$MEM_DIR" && eval "$0" ${COMP_METHOD} ${COMP_LVL} ${LN_OPT} \
+ ${VERBOSE_OPT} ${FORCE_OPT} "${DIR}/${FILE}") || exit $?
echo "<- Leaving ${DIR}/${FILE}." > $DEST_FD1
- cd "$DIR" # Needed for the next iteration of the loop
else # !dir
if ! check_unique "$DIR" "$FILE"; then continue; fi
+ # With automatic compression, get the uncompressed file size of
+ # the file (dereferencing symlinks), and choose an appropriate
+ # compression method.
+ if [ "$COMP_METHOD" = "--automatic" ]; then
+ declare -i SIZE
+ case "$FILE" in
+ *.bz2)
+ SIZE=$(bzcat "$FILE" | wc -c) ;;
+ *.gz)
+ SIZE=$(zcat "$FILE" | wc -c) ;;
+ *)
+ SIZE=$(wc -c < "$FILE") ;;
+ esac
+ if (( $SIZE >= (5 * 2**10) )); then
+ COMP_SUF=.bz2
+ elif (( $SIZE >= (1 * 2**10) )); then
+ COMP_SUF=.gz
+ else
+ COMP_SUF=
+ fi
+ fi
+
# Check if the file is already compressed with the specified method
BASE_FILE=`basename "$FILE" .gz`
BASE_FILE=`basename "$BASE_FILE" .bz2`
elif [ -f "$FILE" ]; then
# Take care of hard-links: build the list of files hard-linked
# to the one we are {de,}compressing.
- # NB. This is not optimum has the file will eventually be
- # compressed as many times it has hard-links. But for now,
+ # NB. This is not optimum has the file will eventually be
+ # compressed as many times it has hard-links. But for now,
# that's the safe way.
inode=`ls -li "$FILE" | awk '{print $1}'`
HLINKS=`find . \! -name "$FILE" -inum $inode`
fi
else
- # There is a problem when we get neither a symlink nor a plain
+ # There is a problem when we get neither a symlink nor a plain
# file. Obviously, we shall never ever come here... :-(
echo -n "Whaooo... \"${DIR}/${FILE}\" is neither a symlink "
echo "nor a plain file. Please check:"