#!/usr/local/bin/perl

#
# Improved statistics generation for NcFTPd Log files
#
# Usage: ncstat <logfile-name>
#
# Jeremy Elson, jelson@circlemud.org
# April 14, 1997
#

$top_hosts = 40;

###########################################################################

@units = ("bytes", "KB", "MB", "GB", "TB", "PB");

sub normalize {
   my $num = shift @_;

   $unit = 0;
   while ($num >= 1000.0) {
     $num /= 1000.0;
     $unit++;
   }

   if ($unit == 0) {
     return sprintf "%ld bytes", $num;
   } else {
     return sprintf "%.2f %s", $num, $units[$unit];
   }
}

sub line {
 print "\n------------------------------------------------------------------\n\n";
}


@months[1..12] = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug",
		"Sep", "Oct", "Nov", "Dec");

sub date {
  my $d = shift @_;
  return sprintf "(on %s %s %s)", substr($d, 4, 2),
	$months[substr($d, 2, 2)], substr($d, 0, 2);
}

############################################################################

# Beginning of main

$total_files = $total_hosts = $total_bytes = $total_days = 0;
$last_day = "none";
$daily_files = $daily_bytes = 0;
$max_daily_files = $max_daily_bytes = 0;
$total_kbseck_product = $total_kbseck_bytes = 0;

#### PARSING AND COUNTING ##################################################

sub update_day_counters {
  $total_days++;

  $todays_files = $total_files - $daily_files;
  $todays_bytes = $total_bytes - $daily_bytes;
  $daily_files = $total_files;
  $daily_bytes = $total_bytes;

  if ($todays_files > $max_daily_files) {
    $max_daily_files = $todays_files;
    $max_daily_files_day = $last_day;
  }
  if ($todays_bytes > $max_daily_bytes) {
    $max_daily_bytes = $todays_bytes;
    $max_daily_bytes_day = $last_day;
  }
}


while (<>) {
  ($date, $transfer) = split(/\|/);
  ($year, $month, $day, $hour, $minute, $second) = split(/[ \/:]/, $date);
  ($retr, $filename, $filesize, $duration, $kbsec, $login, $email, $host,
	$suffix) = split(',', $transfer);

  # skip badly formatted lines
  next if (!defined($host));

  # we don't want non-anonymous logs
  next if $login ne "anonymous";

  # we only want retrievals (not stores)
  next if $retr ne " R";

  $total_files++;
  $total_bytes += $filesize;

  if (defined($bytes_per_host{$host})) {
    $bytes_per_host{$host} += $filesize;
    $files_per_host{$host}++;
  } else {
    $total_hosts++;
    $bytes_per_host{$host} = $filesize;
    $files_per_host{$host} = 1;
  }

  if ($filename =~ /README/ || $filename =~ /Index/) {
    $filename = "pub/Index/Informational Files";
  }

  if (defined($file_hits{$filename})) {
    $file_hits{$filename}++;
  } else {
    $file_hits{$filename} = 1;
  }

  if ($filesize > 30000 && $kbsec <= 1000) {
    $total_kbseck_product += ($filesize / 1024.0) * $kbsec;
    $total_kbseck_bytes += $filesize / 1024.0;
  }

  if ($last_day ne $year . $month . $day) {
    &update_day_counters if ($last_day ne "none");
    $last_day = $year . $month . $day;
  }
}

&update_day_counters;


#### REPORTING ############################################################

printf "%-50s %6d\n", "Number of Days in Summary Period", $total_days;
printf "%-50s %6d\n", "Number of Hosts Accessing Archive", $total_hosts;
 
printf "\n";

printf "%-50s %12s\n", "Total Bytes Transmitted During Summary Period",
        &normalize($total_bytes);
printf "%-50s %12s %s\n", "Maximum Bytes Transmitted in a Single Day",
        &normalize($max_daily_bytes), &date($max_daily_bytes_day);
printf "%-50s %12s\n", "Average Bytes Transmitted Per Day",
        &normalize($total_bytes/$total_days);
 
printf "\n";
 
printf "%-50s %6d\n", "Total Files Transmitted During Summary Period",
        $total_files;
printf "%-50s %6d %s\n", "Maximum Files Transmitted in a Single Day",
        $max_daily_files, &date($max_daily_files_day);
printf "%-50s %9.2f\n", "Average Files Transmitted Per Day",
        $total_files/$total_days;

printf "\n";

printf "%-50s %9.2f\n", "Average Transfer Speed, Files >30Kb (Kb/sec)",
        $total_kbseck_product / $total_kbseck_bytes;

&line;

printf "\nTop %d hosts by number of bytes transmitted:\n\n", $top_hosts;

printf "%-30s %10s %7s %10s\n", "Host", "Bytes", "Files", "Avg. Size";

$num = 0;
foreach $host (sort {$bytes_per_host{$b} <=> $bytes_per_host{$a}}
		keys %bytes_per_host) {
   last if ($num++ == $top_hosts);
   printf "%-30s %10s %7s %10s\n", $host, &normalize($bytes_per_host{$host}),
	$files_per_host{$host},
	&normalize($bytes_per_host{$host}/$files_per_host{$host});
#	sprintf "%.4g%%", $bytes_per_host{$host} / $total_bytes;
}

&line;

printf "\nTop %d hosts by number of files transmitted:\n\n", $top_hosts;

printf "%-30s %10s %7s %10s\n", "Host", "Bytes", "Files", "Avg. Size";

$num = 0;
foreach $host (sort {$files_per_host{$b} <=> $files_per_host{$a}}
		keys %files_per_host) {
   last if ($num++ == $top_hosts);
   printf "%-30s %10s %7s %10s\n", $host, &normalize($bytes_per_host{$host}),
	$files_per_host{$host},
	&normalize($bytes_per_host{$host}/$files_per_host{$host});
#	sprintf "%.4g%%", $bytes_per_host{$host} / $total_bytes;
}

&line;

printf "\nTop %d files by number of accesses:\n\n", $top_hosts;

printf "%10s %s\n", "Hits", "File";

$num = 0;
foreach $file (sort {$file_hits{$b} <=> $file_hits{$a}}
		keys %file_hits) {
   last if ($num++ == $top_hosts);
   $file =~ /pub\/(.*)/;
   printf "%10d %s\n", $file_hits{$file}, $1;
}

