#!/usr/local/bin/perl -w

#
# unique -- part of hypermh, a front end to hypermail for creating
# Web archives of large MH-format mailboxes.  For more information, see
# http://www.circlemud.org/~jelson/software/hypermh.html
#
# To use, make sure hypermail is installed (for more information about
# hypermail, see http://www.eit.com/software/hypermail/hypermail.html).
# Then run unique, naming every file that is a candidate for the archive
# on the command line (e.g.: unique ~/Mail/my-maillist/*)
#
# Jeremy Elson, jelson@circlemud.org
# March 4, 1997
#

$archive_name = "CircleMUD";

##########################################################################

require Date::Manip;

$month[1] = "January";
$month[2] = "February";
$month[3] = "March";
$month[4] = "April";
$month[5] = "May";
$month[6] = "June";
$month[7] = "July";
$month[8] = "August";
$month[9] = "September";
$month[10] = "October";
$month[11] = "November";
$month[12] = "December";


$noid = 0;


#open (DUPES, ">dupes");

sub parse {
  my $id = "";
  my $date = "";

  foreach $line (@_) {
    # stop if we hit a blank line
    last if ($line =~ /^[\s+]$/);

    # try to find the date and mesage it
    if ($id eq "" && $line =~ /^Message-Id: (.*)/i) {
      $id = $1;
    } elsif ($date eq "" && $line =~ /^Date: (.*)/i) {
      $date = $1;
      $date =~ s/\(.*\)//g;
    }

    # return if we have both already
    if ($id ne "" && $date ne "") {
      return ($id, $date);
    }
  }

  # no date available - make one up 
  if ($date eq "") {
    $date = "message with no date";
  }

  # we didn't find a message-id - just make one up
  if ($id eq "") {
     $id = "<artificial-message-id-$noid\@cambot>";
     $noid++;
  }

  return ($id, $date);
}

#print "Deleting old files...\n";
#system "rm -f final-list/*";

print "Building database...\n";
$unique_ids = $duplicates = $total = 0;

while ($file = shift) {
  $total++;
  open (FILE, $file);
  @text = <FILE>;
  close FILE;

  ($id, $date) = parse(@text);

  if (!exists $ids{$id}) {
    $unique_ids++;
    if ($temp = $ids{$id} = Date::Manip::ParseDate($date)) {
      $last_good_date = $temp;
    } else {
       print "Unable to resolve $date ($file)\n";
       $ids{$id} = $last_good_date;
    }
    $filenames{$id} = $file;
#    print "Saving id=$id,  date=$ids{$id}, strdate=$date\n";
  } else {
    $duplicates++;
#    print DUPES "diff $file $filenames{$id}\n";
#    print "Not saving duplicate id $id\n";
  }
}

print "$total files processed: $unique_ids unique message ID's, $duplicates duplicates, ";
print "$noid without a message id\n";

print "Feeding to hypermail...\n";

$last_year = -1;
$last_month = -1;

foreach $key (sort {$ids{$a} cmp $ids{$b}} keys %ids) {
   $curr_date = $ids{$key};
   $curr_year = substr($curr_date, 0, 4);
   $curr_month = substr($curr_date, 4, 2);
   if ($last_year != $curr_year || $last_month != $curr_month) {
      close HYPERMAIL;
      $last_year = $curr_year;
      $last_month = $curr_month;
      $aname = sprintf "$archive_name -- %s %s", $month[$curr_month], $curr_year;
      $fname = sprintf "%s-%s", $curr_year, $curr_month;
      $harg = sprintf "|tee $fname.txt | hypermail -p -i -l \"%s\" -d %s -a ../index.html",
	$aname, $fname;
      print "\n$harg\n";
      open(HYPERMAIL, $harg);
#      open(HYPERMAIL, ">testfile");
   }

   $hdate = Date::Manip::UnixDate($curr_date, "%a %b %e %T %Y");
   print HYPERMAIL "From someone\@somewhere    $hdate\n";
   open (INFILE, $filenames{$key});
   @text = <INFILE>;
   close INFILE;
   print HYPERMAIL @text;
   print HYPERMAIL "\n\n\n\n";
}

close HYPERMAIL;
print "Done.\n";
