#!/usr/local/bin/perl -w

use strict;

## hm_compare
##
## Given two hypermail archives built from the same RFC822 mboxes, 
## compares the metadata in all all the nnnn.html files.
## The goal of the script is to test the backwards compatibility between
## hm 2.b and hm1.2

## Author: Jose Kahan (W3C)
## History:
## 18/Nov/1999 : Written from scratch (JK)
##

## configuration

my $list_dir = "/home/lists";

## global variables

my $newhm_file;
my $oldhm_file;


## read_period_dirs
##
sub read_period_dirs {
    my ($path) = @_;
    my $entry;
    my @periods;

    open (IN, $path);

    while (<IN>) {
	if (/^\#/) {
	    next;
	}
	$entry = (split (/:/))[3];
	push (@periods, $entry);
    }

    close (IN);

    return @periods;
}

## make_filename
## creates a hm filename from the arguments

sub make_filename {
    my ($path, $index) = @_;
    my $filename;

    $filename = sprintf ("$path/%.4d.html", $index);
    return $filename;
} # make_filename

## get_metadata
## retrieves specific metadata from a hypermail HTML generated file

sub get_metadata {
    my $file = shift @_;

    my $received = "";
    my $sent = "";
    my $name = "";
    my $email = "";
    my $subject = "";
    my $id = "";

    open (IN, "$file") or die "Can't open $file: $!";    

    while (<IN>) {
	if (/^<!-- received=/) {
	    $received = $_; 
	} elsif (/^<!-- sent=/) {
	    $sent = $_;
	} elsif (/^<!-- name=/) {
	    $name = $_;
	} elsif (/^<!-- email=/) {
	    $email = $_;
	} elsif (/^<!-- subject=/) {
	    $subject = $_;
	} elsif (/^<!-- id=/) {
	    $id = $_;
	}
    }

    close (IN);

    if ($id eq "") {
	die ("$file didn't have an id\n");
    }

    return ($received, $sent, $name, $email, $subject, $id);

} ## get_metadata

## compare_metadata 
## compares two md variables and dies if they differ

sub compare_metadata {
    my ($type, $new_md, $old_md) = @_;
    my ($answer);

    ## normalize (because of the RE)

##    $new_md =~ tr/A-Z/a-z/;
##    $old_md =~ tr/A-Z/a-z/;

    if ($new_md ne $old_md) {
	print ("Files\n\t$newhm_file\nand\n\t$oldhm_file\n differ\n");
	print ("metadata type: $type\n");
	print ("new: $new_md\n");
	print ("old: $old_md\n");
       
        print "Continue? (y/n) [y]"; 
	$answer = <STDIN>;
        print "\n";
	unless  ($answer eq "\n") {
	   die ("Error: archives differ\n");
        }
    }

} ## compare_metadata

sub compare_files {
    my ($path_new, $path_old) = @_;

    my $index;

    my $new_received;
    my $new_sent;
    my $new_name;
    my $new_email;
    my $new_subject;
    my $new_id;

    my $old_received;
    my $old_sent;
    my $old_name;
    my $old_email;
    my $old_subject;
    my $old_id;

    $index = 0;
    while (1) {
	$newhm_file = make_filename ($path_new, $index);
	$oldhm_file = make_filename ($path_old, $index);

	# continue until there are no more files
	if (!-e $newhm_file && !-e $oldhm_file) {
	    last;
	}
	if (!-e $newhm_file) {
	    die "Error: $newhm_file doesn't exist and $oldhm_file does\n";
	} elsif (!-e $oldhm_file) {
	    die "Error: $newhm_file doesn't exist in $oldhm_file\n";
	}

	($new_received, $new_sent, $new_name, $new_email, $new_subject,
	 $new_id) = get_metadata ($newhm_file);

	($old_received, $old_sent, $old_name, $old_email, $old_subject,
	 $old_id) = get_metadata ($oldhm_file);

## removed because the format of these two md changes
##	compare_metadata ("received", $new_received, $old_received);
##	compare_metadata ("subject", $new_subject, $old_subject);

	compare_metadata ("sent", $new_sent, $old_sent);
#	compare_metadata ("name", $new_name, $old_name);
	compare_metadata ("email", $new_email, $old_email);
	compare_metadata ("id", $new_id, $old_id);

	$index++;
    }
    
    print "Compared ", $index+1, " files\n";

} ## compare_files

## main

{
    my $path_new;
    my $list_new;
    my @periods_new;
    my $path_old;
    my $list_old;
    my @periods_old;
    my $i;

    ($list_new, $list_old) = @ARGV;

    if (! defined ($list_new) || ! defined ($list_old)) {
	die "usage: hm_compare new_hypermail_list old_hypermail_list\n";
    }

    $list_new = "$list_dir/$list_new";
    $list_old = "$list_dir/$list_old";

    @periods_new = read_period_dirs ("$list_new/ArchiveStatus/catalog");
    @periods_old = read_period_dirs ("$list_old/ArchiveStatus/catalog");

    if ($#periods_new != $#periods_old) {
	die ("period dirs are not the same\n");
    }

    for ($i = 0; $i < $#periods_new; $i++) {
	$path_new = "$list_new/Archive/$periods_new[$i]";
	$path_old = "$list_old/Archive/$periods_old[$i]";
	
	print "Comparing\n\t$path_new against\n\t$path_old\n";
	compare_files ($path_new, $path_old);
    }

    print "--> Archives seem to be the same\n";
    
} ## main









