A PERL script to check "junk" for newsgroups

Dave Glowacki dglo at ADS.COM
Thu Feb 14 10:25:39 AEST 1991


Since, as a rule, EVERY C or shell program posted must be followed up
by a PERL script, here's my version of NEWJUNK.

Mine is called 'check-junk'.  It grabs the Newsgroups: lines from all
junked articles, processes them according to a couple of configuration
files and either mails a report to any addresses specified on the command
line or prints the report to stdout (if there weren't any arguments.)

The two configuration files are lists of patterns.  The first list
(junk-trash-list) throws away the entire Newsgroups: line for an article
if a pattern from it matches any of the newsgroups in the line.  The
second list (junk-ignore-list) only ignores the newsgroup matched by
a particular pattern.

I use 'junk-trash-list' to throw away references to regional newsgroups
like 'sub', 'dnet', and 'ne' where things tend to be crossposted to
both a regional group and a local group.  'Junk-ignore-list' is more of
a specific newsgroup/hierarchy eliminator for things like 'alt.sex.*' or
'alt.desert.storm.its.not.scud.its.al-hussein.dammit'.

To install this, stick everything in /usr/lib/news (or wherever you
put these things) and make sure NEWSCTL and NEWSARTS are set correctly.
I run it with '/usr/lib/news/check-junk news at ads.com' every night
before I expire.

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then feed it
# into a shell via "sh file" or similar.  To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix at uunet.uu.net if you want that tool.
# If this archive is complete, you will see the following message at the end:
#		"End of shell archive."
# Contents:  check-junk junk-ignore-list junk-trash-list
# Wrapped by dglo at saturn on Wed Feb 13 14:36:09 1991
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'check-junk' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'check-junk'\"
else
echo shar: Extracting \"'check-junk'\" \(4127 characters\)
sed "s/^X//" >'check-junk' <<'END_OF_FILE'
X#!/usr/local/bin/perl
X#
X# Build a list of all newsgroups sent to 'junk' newsgroup
X#    and either mail the report to the addresses listed on the command line
X#        or print it to STDOUT
X#
X# The report is a series of lines of the form 'nnn articles for newsgroup'
X#
X# $Header: /var/news/src/ADS-scripts/RCS/check-junk,v 1.7 1991/02/13 22:31:08 dglo Exp $
X
X# subroutine to read in the C news environment
X#
X$NEWSCONFIG='/usr/lib/news/bin/config';
X%NEWSENV = ();
Xsub newsconfig {
X  if (open(NEWSENV, "sh -x $NEWSCONFIG 2>&1  |")) {
X    while (<NEWSENV>) {
X      $NEWSENV{$1} = $2 if (/(.*)=(.*)\n/);
X    }
X    close(NEWSENV);
X    1;
X  } else {
X    0;
X  }
X}
X
X# News locations (handle both C news and B news)
X#
Xif (&newsconfig()) {
X  $NEWSCTL = $NEWSENV{'NEWSCTL'};
X  $NEWSARTS = $NEWSENV{'NEWSARTS'};
X} else {
X  $NEWSCTL="/usr/lib/news";
X  $NEWSARTS="/usr/spool/news";
X}
X
X# see if the result is supposed to be mailed somewhere
X#
X$maillist = '';
Xwhile (@ARGV > 0) {
X  $_ = pop(ARGV);
X  $maillist .= ' ' . $_;
X}
X
X# either write to a temp file (to be possibly mailed) or to STDOUT
X#
Xif ($maillist) {
X  $tmpfile = "/tmp/junkmail.$$";
X  open(TMPFILE, ">$tmpfile") || die "Can't open a temporary file!\n";
X} else {
X  open(TMPFILE, ">-") || die "Couldn't send output to STDOUT!\n";
X  select(TMPFILE); $| = 1; select(STDOUT);
X}
X
X# read in list of patterns for which entire Newsgroups line is trashed
X#
X at trashlist = ();
Xif ( -e "$NEWSCTL/junk-trash-list" ) {
X  if (open(LIST, "$NEWSCTL/junk-trash-list")) {
X    while (<LIST>) {
X      chop;
X      push(trashlist, $_);
X    }
X    close(LIST);
X  } else {
X    print TMPFILE "Couldn't open '$NEWSCTL/junk-trash-list'!\n";
X  }
X} else {
X  print TMPFILE "Couldn't find '$NEWSCTL/junk-trash-list'!\n";
X}
X
X# read in list of patterns to ignore
X#
X at ignorelist = ();
Xif ( -e "$NEWSCTL/junk-ignore-list" ) {
X  if (open(LIST, "$NEWSCTL/junk-ignore-list")) {
X    while (<LIST>) {
X      chop;
X      push(ignorelist, $_);
X    }
X    close(LIST);
X  } else {
X    print TMPFILE "Couldn't open '$NEWSCTL/junk-ignore-list'!\n";
X  }
X} else {
X  print TMPFILE "Couldn't find '$NEWSCTL/junk-ignore-list'!\n";
X}
X
X# read in list of good newsgroups
X#
X%newsgroup = ();
Xif ( -e "$NEWSCTL/active") {
X  if (open(ACTIVE, "$NEWSCTL/active")) {
X    while (<ACTIVE>) {
X      s/ .*\n//;
X      $newsgroup{$_} = 1;
X    }
X    close(ACTIVE);
X  } else {
X    print TMPFILE "Couldn't open '$NEWSCTL/active'!\n";
X  }
X} else {
X  print TMPFILE "Couldn't find '$NEWSCTL/active'!\n";
X}
X
Xopen(JUNKNG, "grep '^Newsgroups:' $NEWSARTS/junk/* |") ||
X	die "Couldn't search for Newsgroups in articles in 'junk'!\n";
Xwhile (<JUNKNG>) {
X  chop;
X
X  # get the list of newsgroups
X  #
X  s/^.*:Newsgroups: //;
X  s/\s*//g;
X  $list = $_;
X
X  # see if we should trash this line
X  #
X  foreach $_ (split(/,/, $list)) {
X    foreach $i (@trashlist) {
X      if (/$i/) {
X	$list = '';
X	last;
X      }
X    }
X  }
X
X  # Check each newsgroup on the line
X  #
X  foreach $_ (split(/,/, $list)) {
X
X    # if it doesn't already exist...
X    #
X    if ($newsgroup{$_}) {
X      $unignored = 0;
X    } else {
X
X      # see if it's one we WANT to junk
X      #
X      $unignored = 1;
X      foreach $i (@ignorelist) {
X	if (/$i/) {
X	  $unignored = 0;
X	  last;
X	}
X      }
X    }
X
X    # found one we may want to keep
X    #
X    if ($unignored) {
X      $allng{$_}++;
X      if ($list ne $_) {
X	if (defined($thislist{$_})) {
X	  $thislist{$_} .= ':' . $list;
X	} else {
X	  $thislist{$_} = $list;
X	}
X      }
X    }
X  }
X}
X
X# routine to sort the list of junked newsgroups
X#
Xsub nogood {
X  local($result);
X
X  $result = $allng{$b} - $allng{$a};
X  return $result if ($result);
X
X  if ($a lt $b) {
X    return -1;
X  } elsif ($a gt $b) {
X    return 1;
X  }
X  return 0;
X}
X
X# print the report
X#
Xforeach $i (sort nogood keys(allng)) {
X  $plural = ($allng{$i} == 1 ? " " : "s");
X  print TMPFILE $allng{$i}," article",$plural," for ",$i;
X  print TMPFILE " (",$thislist{$i},")" if (defined($thislist{$i}));
X  print TMPFILE "\n";
X}
Xclose(TMPFILE);
X
X# mail the report (if there's something to mail)
X#
Xif ($maillist) {
X  system "Mail -s 'Junked newsgroups' $maillist < $tmpfile" if ( -s $tmpfile );
X  unlink $tmpfile;
X}
END_OF_FILE
if test 4127 -ne `wc -c <'check-junk'`; then
    echo shar: \"'check-junk'\" unpacked with wrong size!
fi
chmod +x 'check-junk'
# end of 'check-junk'
fi
if test -f 'junk-ignore-list' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'junk-ignore-list'\"
else
echo shar: Extracting \"'junk-ignore-list'\" \(99 characters\)
sed "s/^X//" >'junk-ignore-list' <<'END_OF_FILE'
X^alt\.desert\.storm\.its\.*
X^alt\.drugs
X^alt\.sex.*
X^erg\..*
X^eunet\..*
X^eucon\..*
X^la\..*
X^to\..*
END_OF_FILE
if test 99 -ne `wc -c <'junk-ignore-list'`; then
    echo shar: \"'junk-ignore-list'\" unpacked with wrong size!
fi
# end of 'junk-ignore-list'
fi
if test -f 'junk-trash-list' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'junk-trash-list'\"
else
echo shar: Extracting \"'junk-trash-list'\" \(37 characters\)
sed "s/^X//" >'junk-trash-list' <<'END_OF_FILE'
X^dnet\..*
X^ne\..*
X^sub\..*
X^znet\..*
END_OF_FILE
if test 37 -ne `wc -c <'junk-trash-list'`; then
    echo shar: \"'junk-trash-list'\" unpacked with wrong size!
fi
# end of 'junk-trash-list'
fi
echo shar: End of shell archive.
exit 0
--
Dave Glowacki          dglo at ads.com          Advanced Decision Systems



More information about the Alt.sources mailing list