#!/usr/local/bin/perl # Active Expire -- Detecting actively accessed news groups and setting # expire.ctl accordingly $progname = "activexp"; $version = "Ver 0.04, 30 September 1996"; $author = "Yufan Hu \n##\t\tPug Bainter "; # # Slightly modified by Alan Brown (alan@manawatu.planet.co.nz) 15 Dec 1994 # Massively modified by Pug Bainter 30 Sept 1996 # require 'getopts.pl'; # Usage: run activexp everyday before expire starts. You can put it # in the crontab to start it before news.daily. It is essential to # run activexp before the news.notices file is moved to OLD and # gets compressed. # Or: # activexp -l logfile # for initialising the active group database # The log file and expire.ctl file are assumed to be of INN 1.4 # Other options are: # -d turn on debugging # -m mail results to news user # -h do not keep a "hosts" log # -o override moderated newsgroup default check # # Copyright: Copyrighted by Yufan Hu , 1994 # # Disclaim: Use it at your own risk. # ############# Configuration starts here ######################## # The following should be changed according to your system setting up # $LastRead: the path name of the database to store the # last access time of group # $newslog: the log file of innd & nnrpd (news.notice) # $expire: the expire.ctl file path name # $hostslog: log of hosts with their last accessing time ############################################################### #$Prefix = "/news/" ; $Prefix = "./" ; $LogsPrefix = "/news/logs/" ; $LastRead = $LogsPrefix . "LastRead.log"; $expire = $Prefix . "expire.ctl"; $hostslog = $LogsPrefix . "Hosts.log"; $newslog = $LogsPrefix . "news.notice"; # $default is the number of days for articles to be kept if the coresponding # group is accessed (see expire.ctl for the meaning) # NB. The patterns specified manually at the beginning of the expire.ctl # take precidence # Keep 7 days for accessed groups $default = 7; # This is usually the entry in the second time field for A or U. # (Ie. *:U:0:3:10) $purge = 10; # Binary groups usually eat up more disk space. $bingroups is a perl # regexp of what we consider "binary" groups. This is actually any groups # we want to expire at this rate. The expire rate for them should be set # in the default section of the expire.ctl file. $bingroups = "binaries|pictures|warez|crack|alt\.sex"; # Junk groups usually don't need to be kept at all. $junk is a perl # regexp of what we consider junk groups. The expire rate for them # should be set in the default section of the expire.ctl file. $junk = "junk|control|test|flame"; # All the rest remain for this amount of time only. This is usually the # entry in the second time field for A or U. (Ie. *:U:0:3:10) $keeprest = 3; # The accessed group database has to be established before proper expiring # list being set up. $wait can be set to the number of days for collecting # accessed groups from the log file and establish the database # # You can cat together all the news.notices files stored in the OLD directory # and run activexp once with '-l' option to collect the accessed groups # then set $wait to 1 to start automatical tunning the next day # wait for 7 days before starting updating expire.ctl $wait = 7; # Flag to do host log info $do_hosts = 1; # Flag for moderated newsgroup check $moderated = 0; ################# Debug Only ################################# $debug = 0; ################################################################# # Anything below should not need to be changed do Getopts('l:dhmo'); $newslog = $opt_l if ($opt_l); $debug = 1 if ($opt_d); $do_hosts = 0 if ($opt_h); $moderated = 1 if ($opt_o); if($debug){ $warn .= "Logfile: $newslog\n"; } $mail = "Mail -s Active-Groups news"; $expire_new = "$expire.new"; # Seconds in a day $day = 3600 * 24; $keeptime = $default * $day; $wait = $wait * $day; # open the database recording the time the group was last read # access this database as an associated array dbmopen(LASTREAD, $LastRead, 0600); # open the news log file -- usually the news.notice open(LOG, $newslog); # get the current time $now = time; if($do_hosts) { # Initialising the hostslog array %hostslog = (); open(HOSTSLOG, $hostslog); while(){ if(/^([^ ]+):\s+(.*)$/){ $hostslog{$1} = $2; } } close(HOSTSLOG); } # scan through the log file, detecting any accessed groups within current day # and store the current time in the database while() { if(/^(.*\d\d:\d\d:\d\d).*nnrpd.*:\s+(.*)\s+group ([^\s]+)\s\d+/){ $group = $3; if( $debug && (! $LASTREAD{$group}) ){ $warn .= "Adding: $group\n"; } $LASTREAD{$group} = $now; $hostslog{$2} = "$1 $group"; } elsif(/^(.*\d\d:\d\d:\d\d).*nnrpd.*:\s+(.*)\s+no_permission/){ $hostslog{$2} = "$1 no_permission"; } } close(LOG); open(EXP, $expire); open(EXP_NEW, ">$expire_new"); $start = "### Actively Accessed Groups Start ###"; $end = "### Actively Accessed Groups End ###"; # skip over everything in the begining of the expire.ctl file @defined = (); while(){ if(!/$start/){ if(/^(.+):(.):(\d+|never):(\d+|never):(\d+|never)/){ $group = $1; if($group eq '*'){ if(($2 ne 'A' && !$moderated) || ($2 eq 'U' && $moderated)){ if(($3 != 1 && $3 != 0) || $4 != $keeprest){ print EXP_NEW "##$_"; print EXP_NEW "*:$2:1:$keeprest:$5\n"; next; } } } else { @groups = split(/,/,$group); if(($4 >= $default) || ($4 eq "never")){ if($group =~ /\./){ $group =~ s/\./\\./g; } if($group =~ /[\*\+\?]/){ $group =~ s/([\*\+\?])/.$1/g; } if ($debug) { print "@groups\n"; } push(@defined, split(/,/,$group)); } else { if ($debug) { print "skipping @groups define\n"; } } } } print EXP_NEW $_; } else { last; } } close(EXP); # the automatic expire configuration starts here ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($now); $date = qx/date/; chop($date); $day = $wday; if($date =~ /^([A-Z][a-z]+)\s/){ $day = $1; } $start_token = "### ACTGROUP START TIME ###"; if( ! $LASTREAD{$start_token} ) { if ($debug) { print "Start time being set.\n"; } $LASTREAD{$start_token} = $now; } $start_time = $LASTREAD{$start_token}; require "ctime.pl"; chop($start_time_string = &ctime($start_time)); print EXP_NEW "$start ############################################################# ## Automatically generated by $progname ($version) ## Auther: $author ## Date: $date ## Started from:$start_time_string ## ## Do NOT change anything below this line. They are automatically ## generated by $progname ## ############################################################# "; $total = 0; # Scan over the database. If a group was accessed within the predefined # keeptime, then change the expire time for that group to the keeptime for $group ( sort (keys %LASTREAD) ) { ($group eq $start_token) && next; # if some group is specifically defined, skip it $def = "NOT"; for $d ( @defined ){ if($group =~ /^$d/){ if($debug){ $d1 = $d; if($d1 =~ /\.[*+?]/){ $d1 =~ s/\.([*+?])/$1/g; } $warn .= "Drop: $group, implied by $d1\n"; } $def = "Yes"; } if( $def eq "Yes" ) { last;} } if( $def eq "Yes" ) { next; } $diff = $now - $LASTREAD{$group}; if($diff <= $keeptime){ if(($group !~ /$bingroups/) && ($group !~ /$junk/)) { print EXP_NEW "$group:A:0:$default:$purge\n"; } else { if ($debug) { $warn .= "Drop: $group, binary or junk group\n"; } } $total++; } else { if($debug){ $warn .= "Drop: $group, inactive for $default days\n"; } delete $LASTREAD{$group}; } } # get the number of entries in the database $groups = (keys %LASTREAD) - 1; print EXP_NEW "## Total: $total/$groups groups processed\n"; print EXP_NEW "\n$end\n## Do not put anything after this line ###\n"; close(EXP_NEW); dbmclose(LASTREAD); if($debug){ $warn .= "## Total: $total/$groups groups processed\n"; } if ($opt_m) { # mail the result to the news owner system "$mail < $expire_new"; } if($now - $start_time > $wait && ! $debug){ system "mv $expire $expire.$day; mv $expire_new $expire"; } else { $warn .= "Collecting group information only, $expire intacted.\n"; $warn .= "Expire list is in $expire.new\n"; } if ($do_hosts) { # flush the hostslog file system "mv -f $hostslog $hostslog.$day"; open(HOSTSLOG, ">$hostslog"); $hosts = (keys %hostslog); if($debug){ $warn .= "\nHost access log: $hosts hosts\n\n";} foreach $host ( sort keys %hostslog ){ printf HOSTSLOG "%-25s $hostslog{$host}\n", "$host:"; if($debug){ $warn .= sprintf("%-25s $hostslog{$host}\n", "$host:"); } } $warn .= "\n"; } if($warn){ print "\nMessage from $progname:\n$warn";} exit; ########### Change Log #################################### # # 8/Aug/94: # Bug: reported by hagberg@mail.med.cornell.edu: # when something like '*binaries*' appears in the expire.ctl # actgroups will complain because it is not a legal regexp # for perl # fix: change the '*','+','?' appear in the pattern to '.*','.+' # and '.+' before compare with the group names. # 11/Aug/94: # Add: Hosts.log file to log the hosts accessing our server and its # last access time # 17/Aug/94: # Add: delete entries from the database if the groups are not active # for more than $default days # # 18/Aug/94:(Ver 0.03b, 18 August 1994) # Bug: reported by Tim Rosmus : # if the expire line contains never, that line was not got into # the @defined array. Fixed. # Add: Three more options (patches from tim@mdd.comm.mot.com) # -d, -h, -m # 22/Aug/94: # Bug: reported by Andrew Macpherson : # de.*:A:1:2:12 will cause demon.* to be dropped # 23/Aug/94: # Add: $prefix for the name of directory of expire.ctl & others # Suggested: Andrew Macpherson # 30/Aug/94: # Chg: suggested by Alan Brown # Do not touch the purge part of the line # *:A:1:1:45 # so that things which have expire date set will stay longer # (such as FAQs). # # 15/Dec/94 # Chg: (Alan Brown) # Do not touch the line # *:A:0:0:0 # while still processing # *:M:1:1:45 # *:U:1:1:45 # This allow INN to purge the contents of removed groups # immediately instead of having them expire slowly. # # # 30/Sep/96 # Chg: Pug # The name of the script from actgroups.pl to activexp.pl. # Chg: Pug # The behavior when checking the default sections of the # the old expire.ctl. Only add to the defined list if # the default keep is the same or larger than $default. # Check for 0 or 1 instead of just 1. (If somone wants # their message to expire that quick, so be it.) # Del: Pug # $bindefault due to notes for $bingroups and $junk. # Add: Pug # The $bingroups and $junk. Read notes there as well. # Add: Pug # $LogsPrefix # Add: Pug # More debugging statements. # Add: Pug # -o flag to override moderated groups default check. # All this does is change the check in the definition # section from ne 'A' to eq 'U'. # Add: Pug # Do the right thing with commas in the default section. # Add: Pug # Add comments regarding purge and keeprest.