home *** CD-ROM | disk | FTP | other *** search
- : '@(#) ngsizes 1.7 93/08/18 00:09:10'
- #
- # ngsizes - Generate disk usage summary for USENET newsgroups.
- #
- # Copyright 1990-1993, Unicom Systems Development. All rights reserved.
- # See accompanying README file for terms of distribution and use.
- #
- # Usage:
- #
- # ngsizes [-D] [-b breakdown_list] [-t threshold]
- #
- # -t Specifies only groups using "threshold" or more disk blocks should
- # be reported. The default is defined by the "threshold" parameter
- # below.
- #
- # -b Specifies how usage should be broken down versus age. For example,
- # saying "-b 0,7,14" will report usage in three columns: the total
- # usage, the usage by articles a week or older, and the usage by
- # articles two weeks or older. The default is defined by the
- # "breakdown" parameter below.
- #
- # -D For debugging, the temporary files will be maintained.
- #
-
- USAGE="usage: $0 [-b breakdown_list] [-t threshold]"
-
-
- ##############################################################################
- #
- # Site-specific definitions.
- #
-
- SPOOLDIR=/usenet/spool/news # Pathname to the Usenet spool directory.
- ACTIVE=/usenet/lib/news/active # Pathname to the list of active newsgroups.
- DU=du # Pathname to the enhanced "du" command.
-
- #
- # Pick one of the following. It specifies how to account for the disk
- # space used by cross-posted articles.
- #
- DU_LINKOPTS=-l # Count xposted article every time it appears.
- #DU_LINKOPTS= # Count xposted article only first time it is encountered.
- #DU_LINKOPTS=-L # Average usage across newsgroups in which it appears.
-
- #
- # Pick one of the following. It specifies how you want usage reported.
- #
- DU_BSIZE=-k # Report usage in KB.
- #DU_BSIZE= # Report usage however du normally does.
-
- #
- # Specify the default reporting threshold. Newsgroups with usage below
- # this value will not be reported. The units for this number depend upon
- # whatever you specified above for DU_BSIZE. This default can be overridden
- # by the "-t" command line option.
- #
- DFLT_THRESHOLD=0
-
- #
- # Specify the default for the breakdown. There will be one column in
- # the output for every number in the breakdown list. Each value in the
- # list specifies a number of days, and the corresponding column will show
- # the disk usage by articles that are that many days or older. This default
- # can be overridden by the "-b" command line option.
- #
- DFLT_BREAKDOWN=0,1,3,5,7,15
-
- #
- # Work Files:
- # $TMP.read Readership statistics.
- # $TMP.ngs List of all newsgroups to check.
- # $TMP.du Disk usage for all directories in the news spool dir.
- #
- TMP=/tmp/ngsz$$
- trap 'rm -f $TMP.* ; exit 1' 1 2 3
-
- #
- # End of site-specific customizations.
- #
- ##############################################################################
-
-
- #
- # Initialize.
- #
- debug=0
- threshold=$DFLT_THRESHOLD
- breakdown=$DFLT_BREAKDOWN
-
- #
- # Crack the command line options.
- #
- set -- `getopt 'Db:t:' $*`
- if [ $? -ne 0 ] ; then
- echo "$USAGE" 1>&2
- exit 1
- fi
- while : ; do
- case "$1" in
- -D) TMP=/tmp/ngsz debug=1 ; trap '' 1 2 3 ; shift ;;
- -b) breakdown="$2" ; shift 2 ;;
- -t) threshold="$2" ; shift 2 ;;
- --) shift ; break ;;
- *) echo "$USAGE" 1>&2 ; exit 1 ;;
- esac
- done
- if [ $# -ne 0 ] ; then
- echo "$USAGE" 1>&2
- exit 1
- fi
-
- #
- # Verify we can find the active file.
- #
- if [ ! -r $ACTIVE ] ; then
- echo "$0: file '$ACTIVE' not found or unreadable" 1>&2
- [ $debug -eq 0 ] && rm -f $TMP.*
- exit 1
- fi
-
- #
- # Get a count of the readers for each newsgroup.
- # Output format will be "readership_count newsgroup_name"
- #
- for newsrc in `awk -F: '{ print $6 "/.newsrc" }' /etc/passwd | sort -u` ; do
- test -f $newsrc && sed -n 's/:.*//p' $newsrc
- done | sort | uniq -c > $TMP.read
-
- #
- # Build a sorted list of all known newsgroups from the active file.
- #
- sed '
- s/[ ].*//
- /^$/d
- ' $ACTIVE | sort -u > $TMP.ngs
-
- #
- # Scan the spool directory for disk usage. Convert the newsgroup pathname
- # to a newsgroup name, and move it to the first field on the line.
- # Output format will be "newsgroup_name usage usage ..."
- #
- if [ $debug -ne 0 -a -f $TMP.du ] ; then
- : suppress scan for debugging
- else
- $DU -ir $DU_LINKOPTS $DU_BSIZE -c "$breakdown" $SPOOLDIR \
- | sed \
- -e 's/^\(.*\) \([^ ]*\)$/\2 \1/' \
- -e "s!$SPOOLDIR/!!" \
- -e "s!/!.!g" \
- | sort -u \
- > $TMP.du
- fi
-
- #
- # Generate the report.
- #
- (
- echo "BREAKDOWN $breakdown" | sed 's/,/ /g'
- echo "THRESHOLD $threshold"
- sed 's/^/READERS /' $TMP.read
- join $TMP.du $TMP.ngs | sort -rn +1 | sed 's/^/NEWSGROUP /'
- ) | awk '
-
- BEGIN {
- LINE_WIDTH = 79 # maximum length of a line
- NG_WIDTH = 26 # width of field to print newsgroup in
- READR_WIDTH = 4 # width of field to print number of readers in
- FRONT_FMT = "%-" NG_WIDTH "." NG_WIDTH "s" "%" READR_WIDTH "s"
- }
-
- #
- # Record "BREAKDOWN n1 n2 ..."
- # Defines the format for the newsgroup usage lines. Each "n" corresponds
- # to one column in the newsgroup usage line, and specifies the age of
- # articles which consume this amount of disk space.
- #
- $1 == "BREAKDOWN" {
- num_breakdn = NF - 1
- FIELD_WIDTH = ( LINE_WIDTH - (NG_WIDTH+READR_WIDTH) ) / num_breakdn
- if ( FIELD_WIDTH > 8 )
- FIELD_WIDTH = 8
- FIELD_FMT = "%" FIELD_WIDTH "s"
- printf(FRONT_FMT,"newsgroup","read")
- for ( i = 0 ; i < num_breakdn ; ++i )
- printf(FIELD_FMT,sprintf("%ddays",$(i+2)))
- printf("\n")
- next
- }
-
- #
- # Record "THRESHOLD n"
- # Indicates we only want to see newsgroups using "n" or more blocks.
- #
- $1 == "THRESHOLD" {
- threshold = $2
- next
- }
-
- #
- # Record "READERS n ng"
- # Indicates that newsgroup "ng" has "n" readers.
- #
- $1 == "READERS" {
- num_readers[$3] = $2
- next
- }
-
- #
- # Record "NEWSGROUP ng n1 n2 ..."
- # Indicates the disk usage of newsgroup "ng". Each "n" specifies the
- # diskspace used by articles "ndays" or older, where "ndays" is defined
- # by the BREAKDOWN record.
- #
- $1 == "NEWSGROUP" {
- if ( $3 >= threshold ) {
- if ( num_readers[$2] == "" )
- num_readers[$2] = 0
- printf(FRONT_FMT,$2,num_readers[$2])
- for ( i = 0 ; i < num_breakdn ; ++i )
- printf(FIELD_FMT,$(i+3))
- printf("\n")
- }
- next
- }
-
- {
- printf("ngsizes - bad line '%s'\n", $0) | "cat 1>&2"
- }
-
- '
-
- [ $debug -eq 0 ] && rm -f $TMP.*
- exit 0
-
-