news.utdallas.edu!tamsun.tamu.edu!cs.utexas.edu!zaphod.mps.ohio-state.edu!uwm.edu!ux1.cso.uiuc.edu!news.cso.uiuc.edu!ekatz Mon Mar 22 15:42:22 CST 1993 Article: 1519 of comp.infosystems.gopher Xref: feenix.metronet.com comp.infosystems.gopher:1519 alt.gopher:113 Newsgroups: comp.infosystems.gopher,alt.gopher Path: feenix.metronet.com!news.utdallas.edu!tamsun.tamu.edu!cs.utexas.edu!zaphod.mps.ohio-state.edu!uwm.edu!ux1.cso.uiuc.edu!news.cso.uiuc.edu!ekatz From: ekatz@ncsa.uiuc.edu (Eric Katz) #Subject: GopherReport v.2.0 Date: Mon, 22 Mar 1993 17:14:40 GMT Message-ID: <1993Mar22.171440.741@ncsa.uiuc.edu> Distribution: usa Sender: ekatz@ncsa.uiuc.edu (Eric Katz Originator: ekatz@landrew.ncsa.uiuc.edu Organization: Nat'l Ctr for Supercomp App (NCSA) @ University of Illinois Lines: 536 Following is the new release of GopherReport version 2.0. Thanks to all who have contributed comments and bug reports. There are 2 important items to remember with this new release. 1. invoking with now arguements now displays a help page. 2. This version corrects a bug that incorrectly calculated file/directory accesses. I apologize for any inconvenience this may have caused. See the revision history in the program text for a summary of features and bug fixes. <<<<< CUT HERE >>>> #!/usr/local/bin/perl sub USAGE { print "USAGE: GopherReport -a|d|t|f|h|e [-s] [-vN] [-l logname] [keys] a All d Daily t Time f Files h Host e Errors s Summary l Name of the log file vN Verbosity D Current Day only\n"; print " keys To limit the report to specific days, times, files, and/or hosts. Any combination of keys may be specified. See examples below. Try to use the following conventions for the best results: Search for a date: 'nn' ie: 10 Search for a time: 'nn:' ie: 10: Search for a Day: 'Xxx' ie: Sun or Sunday Search for a file: 'xxx' ie: software or 'xx/' you may need a leading or '/xx' or trailing '/' Search for a domain: '.xxx' ie: .edu or ncsa\n\n"; exit; } # Written by: Eric Katz # email: ekatz@ncsa.uiuc.edu # If you use this program regularly please send e-mail to the above # address and I will try to provide you with updated versions as they # come available. I am also interested in knowing how often you rotate # your gopher log and how long you keep old logs. ######################################################################### # Permission is granted to anyone to use this software for any purpose on # any computer, and to alter it for your personal use. Please do not # distribute any alterations. # # 0. This software is provided "as is" and without any express or implied # warranties, including, without limitation, the implied warranties of # merchantability and fitness for a particular purpose. # # 1. The author is not responsible for the consequences of the use of this # software, no matter how deleterious, even if they arise from flaws in it. # # 2. The origin of this software must not be misrepresented, either by # explicit claim or by omission. # # # 3. This software may not be sold. # # 4. This notice may not be removed or altered. # ######################################################################### # To install - Please remove everthing above the #! line. # Change the perl path to reflect the location of your perl installation. # Then answer the following question. # Whereis your gopher log? $GopherLOG = "/var/log/gopher_log"; ###################################################################### # DO NOT CHANGE ANTHING BELOW THIS LINE FOR DISTRIBUTION. # You make may changes for your personal use but DO NO DISTRIBUTE YOUR # CHANGES. # # REVISION HISTORY # V1.0 Released 2/9/93 # V1.1 Released 2/15/93 # Bug Fix: File display now correctly displays file names with spaces # Bug Fix: Now handles file names with regexp in them. # Bug Fix: Illegal division by zero if no connections for the report # period fixed. # Bug Fix: Cleaned up output of large file names # Feature: Added number of files accessed. # Feature: Added varying level of verbosity to the output of file # accesses or host accesses. # In the case of file accesses the number represents the depth # of the directory tree that will be printed. # In other words -v1 will only report on root menu accesses, # while -v2 will go one directory deeper and so on. # In the case of host accesses, the number represents the # top percentile. # In other words, -v10 will show the top 10 percent of # connections. # Feature: Added -s, summary output. This prints only the total # number of connections and files accessed. # Feature: Added search key capability. Any arguments at the end of # the command line are assumed to be search key limiters. # It will try to 'do the right thing' but it is safer # to limit the type of search by using one of the -d, -t, # -f, or -h flags. # # # EXAMPLES: # GopherReport Thu Friday 10 11 14 15 Software # Will report on the number of people who # accessed the Software files on Thursday # and Friday during the 10am, 11am, 2pm, # and 4pm hours. # GopherReport -f edu # Will report on file accesses that have # the edu substring in them. # GopherReport -h edu # Will report on hosts with edu in the name. # GopherReport -fh edu # Will report on both hosts and files with # edu in them. # V2.0 Released 3/22/93 # # Bug Fix: Previous version incorrectly tallied bogus entries. # Bug Fix: Broke out directory accesses from file retrievals. # Feature: Added -l logname capability to examine logfiles other # than the default # Feature: Added -D to make it easier to examine the logfile for # today's information. # Feature: Added -e to scan for error messages ########################################################################## require 'getopts.pl'; require 'ctime.pl'; # For now the only date information that is needed is the third field # regardless of whether it is a BSD system or not but we never know # what the future holds so I included the following subroutines. if (-e "/vmunix") { $BSD = "true"; } else { $BSD = "false"; } # Get the current time and format it: $DATE=&ctime(time),"\n"; if ($BSD eq "true") { ($day, $month, $date, $time, $year) = split (" ",$DATE); } else { ($day, $month, $date, $time, $TZ, $year) = split (" ",$DATE); } &Getopts('adDtfhesl:v:'); # If no options are passed on the command line then display help page. &USAGE if (!($opt_d) &! ($opt_t) &! ($opt_f) &! ($opt_e) &! ($opt_h) &! ($opt_s)); # If an alternate logfile has been requested if (defined $opt_l) { $GopherLOG = $opt_l; } # If you want a printout for today only #$SearchDate = $date if ($opt_D); if ($opt_D) { $SearchDate = "\\b" . $date . "\\b" . "|"; } # If there are no search key limiters. if (($#ARGV < 0) &! ($opt_D)) { $NO_KEY = 1; $NO_DAY_KEY = 1; $NO_TIME_KEY = 1; $NO_OTHERS_KEY = 1; } # Else create the 'or' pattern matching string else { for ($i=0;$i<=$#ARGV;$i++) { if ($ARGV[$i] =~ /Mon|Tue|Wed|Thu|Fri|Sat|Sun/) { $SearchDay = $SearchDay . $ARGV[$i] . "|"; } elsif ($ARGV[$i] =~ /\d+:/) { chop $ARGV[$i]; $SearchTime = $SearchTime . $ARGV[$i] . "|"; } elsif ($ARGV[$i] =~ /\d+/) { die "This does not make sense with the -D option\n" if ($opt_D); $SearchDate = $SearchDate . "\\b" . $ARGV[$i] . "\\b" . "|"; } else { $SearchOthers = $SearchOthers . $ARGV[$i] . "|"; } } } if (defined $SearchDay) { chop ($SearchDay); } else { $NO_DAY_KEY = 1; } if (defined $SearchDate) { chop ($SearchDate); } else { $NO_DATE_KEY = 1; } if (defined $SearchTime) { chop ($SearchTime); } else { $NO_TIME_KEY = 1; } if (defined $SearchOthers) { chop ($SearchOthers); # We need to escape possible wildcard characters in this string, then # unescape the 'or' character $SearchOthers =~ s/\W/\\$&/g; $SearchOthers =~ s/\\\|/\|/g; } else { $NO_OTHERS_KEY = 1; } open (LOG,$GopherLOG) || die "Couldn't open log file\n"; # Process the information in the log. # First, split the line into 2 parts separated by the colon. This tells # you who logged on and what they did. # Second, split the 'who' variable into the different components. # Third, split the 'what' variable into it's different components. # Fourth, If this is the first record in the log then set the start date # for the output to the date this entry was made. # Fifth, set the array information and determine the maximum value for # each array. The maximum value will be used to set the scale for the # the graph. # If the entry is that of a connection then increment the number # of connections, the number of connections for that weekday, # the number of connections for that hour of the day, and the # number of connections for that host. # If the entry is that of an action vs. a connection then increment # the number of retrievals for that directory. # Sixth, increment the record number. $RECORD_NUMBER = 1; while () { ($who, $what) = split (/ : /,$_,2); ($day, $mon, $date, $time, $year, $pid, $host) = split (/\s+/,$who); ($action, $file_type, $path) = split (/\s+/,$what,3); chop $path; ($hour,$minutes,$seconds) = split (/:/,$time); if ($RECORD_NUMBER eq 1) { $start_date = "$mon $date, $year @ $time"; } # This ridiculously long conditional just tries to do the right thing with # possible search key limiters if (($NO_KEY) || ($NO_DAY_KEY || ($day =~ /$SearchDay/)) && ($NO_DATE_KEY || ($date =~ /$SearchDate/)) && ($NO_TIME_KEY || ($hour =~ /$SearchTime/)) && ($NO_OTHERS_KEY || ($path =~ (/$SearchOthers/) || ($host =~ /$SearchOthers/)))) { if ( $file_type =~ /Connection/ ) { $connections ++; $Connect{$day} ++; $DayMax = $Connect{$day} if ($Connect{$day} > $DayMax); $Connect{$hour} ++; $HourMax = $Connect{$hour} if ($Connect{$hour} > $HourMax); $Machines{$host} ++; $HostMax = $Machines{$host} if ($Machines{$host} > $HostMax); } elsif (($action =~ /retrieved/) &! ($host =~ /^0\./)) { if ($file_type =~ /file/) { $FILE_ACCESSES ++; $Directory{$path} ++; $FileMax = $Directory{$path} if ($Directory{$path} > $FileMax); } if ($file_type =~ /directory/) { $DIRECTORY_ACCESSES ++; $Directory{$path} ++; $FileMax = $Directory{$path} if ($Directory{$path} > $FileMax); } } else { if ($path =~ /not exist!!/) { $EXIST_ERRORS_PRINT = $EXIST_ERRORS_PRINT . "\n$mon $date $hour:$minutes $action"; $EXIST_ERRORS_PRINT = $EXIST_ERRORS_PRINT . " $file_type" if ($file_type !~ /does/); $EXIST_ERRORS ++; } else { if ($file_type =~ /directory/) { $ACCESS_ERRORS_PRINT = $ACCESS_ERRORS_PRINT . "$mon $date $hour:$minutes $path\n"; $ACCESS_ERRORS ++; } } } $RECORD_NUMBER ++; } } close LOG; if ($opt_e) { print "==================================================================\n"; print " Gopher Error History\n"; print " Beginning $start_date\n"; print " Ending $end_date\n"; print "==================================================================\n"; print "\n"; print "NON-EXISTING FILES: $EXIST_ERRORS Occurences\nDATE TIME FILE\n____________________________________________\n"; if ($EXIST_ERRORS) { print $EXIST_ERRORS_PRINT; } print "\n\nNON-ACCESSIBLE DIRECTORIES: $ACCESS_ERRORS Occurences\nDATE TIME FILE\n____________________________________________\n"; if ($ACCESS_ERRORS) { print $ACCESS_ERRORS_PRINT; } } # Set the end date variable to that of the last entry in the log. $end_date = "$mon $date, $year @ $time"; if ($opt_a) { $opt_d = 1; $opt_h = 1; $opt_t = 1; $opt_f = 1; } # This number represents either the number of subdirectory levels to report # or the top x percentage of host connections depending on the context. if (defined $opt_v){ $verbosity_limit = $opt_v; } else { $verbosity_limit = 100; } # Set an array to convert 3 letter weekday abbreviations to full names. @weekdays = ('Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'); # Print the header print "\n\n"; print "==================================================================\n"; print " Gopher Usage Report\n"; print " Beginning $start_date\n"; print " Ending $end_date\n"; print "==================================================================\n"; print "\n"; print " Successful Failed\n"; print " ------------ ---------\n"; print "Total Number of Connections: $connections\n"; print "Total Number of Files Retrieved: $FILE_ACCESSES $EXIST_ERRORS\n"; print "Total Number of Directories Accessed: $DIRECTORY_ACCESSES $ACCESS_ERRORS\n"; print "\n"; exit if ($opt_s); if ($opt_d) { $SubTotal=0; # Print the connections/weekday data print "\n"; print "Connections Per Week Day\n"; printf ("%-8s%-30s\n%s\n"," Day","Connections","---------------------------------------------------------------------"); # Print the output for each record. foreach $heading (@weekdays) { # Convert the heading for each day (long form) into the short form for # accessing the array information. $index = substr($heading,0,3); # Calculate the number of stars to print. if ($DayMax > 0 ){ $graph = '*' x ($Connect{$index} / $DayMax * 50); } else { $graph = ""; } # Print them. if (($index =~ /$SearchDay/) || $NO_DAY_KEY) { printf ("%-13s%3s %s\n",$heading,$Connect{$index},$graph); $SubTotal = $SubTotal + $Connect{$index}; } } print "\n"; print "Total for this Section: $SubTotal\n\n" if !($NO_KEY); } if ($opt_t) { $SubTotal=0; # Iterate through the 'Hours' array to print the hourly information. The # first 10 numbers are quoted to preserve the leading 0. @Hours = ('00','01','02','03','04','05','06','07','08','09',10 .. 23); print "\n"; print "Connections Per Hour\n"; printf ("%-8s%-30s\n%s\n"," Hour","Connections","---------------------------------------------------------------------"); foreach $heading (@Hours) { if ($HourMax > 0 ){ $graph = '*' x ($Connect{$heading} / $HourMax * 50); } else { $graph = ""; } if (($heading =~ /$SearchTime/) || $NO_TIME_KEY) { $SubTotal = $SubTotal + $Connect{$heading}; # Change the heading to read 'Midnight' if appropriate if ($heading == '00') { printf ("%-12s%4s %s\n","Midnight",$Connect{$heading},$graph); } # Change the heading to read 'Noon' if appropriate elsif ($heading == '12') { printf ("%-12s%4s %s\n","Noon",$Connect{$heading},$graph); } # Else just print the hour by number. else { printf ("%-12s%4s %s\n",$heading,$Connect{$heading},$graph); } } } print "\n"; print "Total for this Section: $SubTotal\n\n" if !($NO_TIME_KEY); } if ($opt_f) { $SubTotal=0; print "\n"; print "Access Report for Directories and Files\n\n"; printf ("%s\n%s\n","Directory or File Accessed","-------------------------------------------------------------------------"); # Set an array of full pathnames @DIRS = keys(%Directory); # Sort that array. @sortedDIRS = sort @DIRS; foreach $heading (@sortedDIRS) { # Calculate the number of stars for each entry (I add 1 just so that # each line gets at least 1 star for visual reasons. This actually sets # the scale off a bit but it is just meant to be a quick reference any # way. If you want more accurate graphing remove the '+ 1' in the following # line. # Calculate the number of stars to print. # StringLength is used in order to split the filename into multiple # lines if it exceeds 30 characters. $StringLength = 0; if ($FileMax > 0 ){ $graph = '*' x (($Directory {$heading} / $FileMax * 30) + 1); } else { $graph = ""; } # Create an array consisting of the names in the full path without # the '/' @heading_format = split (/\//,$heading); $PRINT_ME = 1 if ($heading =~ /$SearchOthers/); # Iterate through this list of file names that defines the path. For each # name in this path that is the same as the path of the previously printed # file name prepend a leading '.' to the name. # This is all to prevent the duplication of full pathnames in the output # to make it more readable. for ($i=1;$i < $#heading_format;$i++) { if ($heading_format[$i] eq $previous_heading[$i]) { $heading_format[$i] = "."; } else { $heading_format[$i] = $heading_format[$i] . "/"; } $StringLength = $StringLength + length($heading_format[$i]); if ($StringLength > 30){ $heading_format[$i] = $heading_format[$i] . "\n "; $StringLength = 3; } } # Pad the output to create nice columns. printf doesn't handle the # multiple line filenames very well. $Spaces = " " x (37 - ($StringLength + length ($heading_format[$i]))); $heading_format[$i] = "$heading_format[$i]$Spaces"; # If this is the root menu then label it as such. if ($heading eq "\/") { @heading_format = "Main_Menu "; } # Set the printed array element delimiter to null $" = ""; # Print it. if ($i <= $verbosity_limit) { if ($PRINT_ME || $NO_OTHERS_KEY) { $SubTotal = $SubTotal + $Directory{$heading}; printf ("%s%5s %s\n","@heading_format",$Directory{$heading},$graph); #printf ("%-35s%5s %s\n","@heading_format",$Directory{$heading},$graph); } } # Set this to be the previously printed filename. @previous_heading = split (/\//,$heading); $PRINT_ME = 0; } print "\n"; print "Total for this Section: $SubTotal\n\n" if !($NO_KEY); } if ($opt_h) { $SubTotal=0; # Print the name of each host and the number of time they connected. print "Connections per Host\n"; sub by_connections { $Machines{$b} <=> $Machines{$a}; } @HOSTS = keys(%Machines); @sortedHOSTS = sort by_connections @HOSTS; printf ("%-30s%s\n%s\n","Host ","Number of Connections","---------------------------------------------------------------------"); foreach $heading (@sortedHOSTS) { if (($heading =~ /$SearchOthers/) || $NO_OTHERS_KEY) { if ($SubTotal < ($connections * ($verbosity_limit)/100)){ $SubTotal = $SubTotal + $Machines{$heading}; printf ("%-50s%s\n",$heading,$Machines{$heading}); } } } print "\n"; print "Total for this Section: $SubTotal\n\n" if !($NO_KEY); } -- =============================================== Eric Katz (ekatz@ncsa.uiuc.edu) Computer Utilization Analyst