#!/usr/local/bin/perl5 -w
######################################
# Script for regenerating the W3C
# HTML archives for its mailing lists
# Author: Jose Kahan <kahan@w3.org> 
######################################
# History:
# 6/Mar/97: The script is born
# 7/Mar/97: Added support for arguments
# 20/Dec/99: Changed the way we get the filenames
#            from an MH directory (the other one
#            was broken when the directory contained other filenames
#            that weren't digits exclusively.          
######################################
#
#USAGE: update-period options
#
# OPTIONS:
#
# -s source dir where the mails
#    are stored  
# -f file name of the first mail to process
# -l file name of the last  mail to process
#
# -t target dir for the html 
#
# -n name of the HTML archive
#   (e.g., www-html@w3.org AprJun96)
#
######################################

#use strict;

############################# 
# Pre-initializationon      #
#############################

BEGIN {
    require 5.005;
    use lib '/home/lists/.bin/';

    ###############################
    # Global custom configuration #
    ###############################
    use "hm_config.dat";
}

#############################
# Global variables          #
#############################

my $list_name = "";  # the list we're upgrading 
my $period = "";     # number of months per period
my $first_msg = -1;  # first mail to process
my $last_msg =  -1;  # last mail to process
my $overwrite = 0;   # overwrite an existing hmail archive
my $progress = 1;    # print progress report to stdout

my $verbose_name = "";    # the desc of the list:  " www-html@w3.org JanFeb96"
my $archive_name = "";    # the name of the archive (used in the mailtos)

my $mail_dir = "";   # the mh mailbox dir
my @fileNames;       # the mbox filenames we will convert (from the mh dir)

my $period_dir = ""; # the target directory
my $catmail = "";    # temporary file where we're concatenating the mh files
# the css URLS
my $messagelist_css = "";
my $message_css = "";

##############################
# Main Program               #
##############################

{
## Configure the script

    &Configure;

#
# Read the Smartlist file names
#

    &ReadSmartListArchive;

#
# Regenerate the files
#

# First, concatenate all the mails together

    &concatenateMails;

# Next, use Hypermail to regenerate the archive

    &callHypermail;

# Finally, add either some error control, rm the temporary file, etc
    
    exit(0);

} ## Main

########################
### subroutines
########################

# Reads a smartlist archive and prepares the messages for processing

sub ReadSmartListArchive {
    my $totalfiles;
    my $i;

    opendir (MAILDIR, "$mail_dir") or die "Can't open $mail_dir: $!";
#@fileNames = grep (-T, map "$mail_dir/$_", readdir MAILDIR;
#@fileNames = grep (!/^\.\.?$/, readdir (MAILDIR));
    @fileNames = grep (/^\d*$/, readdir (MAILDIR));

    closedir MAILDIR;

    $totalfiles = $#fileNames;

#
# Sort the file names
#
    @fileNames = sort by_number @fileNames;
    
#
# Normalize the first and last file-names
#
    
    if ($first_msg == $last_msg) {
	$last_msg = 0;
    }

    for ($i = 0; $i<=$totalfiles; $i++) {
	if ($fileNames[$i] eq $first_msg) {
	    $first_msg = $i;
	    last;
	}
    }

    if ($last_msg) {
	for (; $i <=$totalfiles; $i++) {
	    if ($fileNames[$i] eq $last_msg) {
		$last_msg = $i + 1;
		last;
	    }
	}
    } else {
	$last_msg = $first_msg + 1;
    }

} ## ReadSmartListArchive



sub Configure {
    my ($tmp);
    my ($archive_type);
    my ($css_type);
    my ($list_dir);

    # get runtime options
    &getOpts();

    if ($list_name eq "" ||
	$first_msg == -1 ||
	$last_msg == -1 ||
	$period eq "" ||
	$verbose_name eq "" 
	) {
	die ("Usage: update-period -list <list name> -first <first_msg> -last <last_msg> -pdir <period-dirname> -desc <verbose description of the list> [-quiet]\n");
    }

    #############################
    ## Custom configuration    ##
    #############################
    if (-e 'hm_local_config.dat') {
	do 'hm_local_config.dat';
    }

    $list_dir = "$mail_list_path/$list_name";
    $mail_dir = "$list_dir/archive/latest";
    $period_dir = "$list_dir/Archive/$period";
    $catmail = "$list_dir/Archive/tmpcat";    #$opt_j/tmpcat"; 

    # What is the visibility of this mailing list archive?
    # We use the Archive path to infer it
    $tmp = readlink "Archive";
    if (defined $tmp) {
	if ($tmp =~ /Team/i) {
	    $archive_type = "Team";
	} elsif ($tmp =~ /Member/i) {
	    $archive_type = "Member";
	} else {
	    $archive_type = "Public";
	}
    }

    # set up the CSS URLs
    if (defined $archive_type) {
	$css_type = $archive_type;
	$css_type =~ tr/A-Z/a-z/;
    } else {
	$css_type = "public";
    }
    $messagelist_css = "$css_base/$css_type-messagelist";
    $message_css = "$css_base/$css_type-message";
    
} ## Configure


sub callHypermail {
    my ($archive_type);
    my ($command);
    my ($options);

# find the archive type

    #$options = "-r -m $catmail -l \"$verbose_name\" -d $period_dir -a $other_archives -c /home/lists/w3t-test/.hmrc";
    $options = "-c $hm_conf_file -o readone=0 -o ietf_mbox=1 -M -p -m $catmail -l \"$verbose_name\" -d $period_dir -o icss_url=$messagelist_css -o mcss_url=$message_css -o hmail=$archive_name";

    if ($overwrite) {
	$options = "-x " . $options;
    }

    if ($progress) {
	$options = "-p " . $options;
    }

    $command ="$hypermail $options"; 
    system($command);

## comment the following  line if we're debugging
    unlink($catmail);
} ## callHypermail

sub search_japanese_charset {
    my $file = shift;
    my $found;
 
    open (IN, "<$file") or
        die "Could not open $file: $!";
    $found = 0;
    while (<IN>) {
        if (/^Content-Type:.*charset.*ISO-2022-JP/i) {
            # the message has a japanese charset
            $found = 1;
            last;
        }
    }
    close (IN);
    return ($found);
 
} ## search_japanese_charset

sub concatenateMails {
    my $i;
    my $do_filter; #is 1 if we need to nkf filter a message
    my $filename; #the filename we're currently processing

# First, concatenate all mails into a single file 
    
    open (OUT, ">$catmail") or die "Can't open $catmail: $!";
    
    if ($progress) {
	print "Concatenating files ...  $mail_list_path/......";
    }
    for ($i=$first_msg; $i < $last_msg; $i++) {
	if ($progress) {
	    printf "\b\b\b\b\b\b%-6d", $fileNames[$i];
	}
	
#    sleep(1);

	if (!-s "$mail_dir/$fileNames[$i]") {
	    if ($progress) {
		print "skipped non existant file: $mail_dir/$fileNames[$i]\n";
		next;
	    }
	}

	if ($filter_message) {
	    $do_filter = 0;
	    $do_filter = search_japanese_charset ($filename);
	}

        # we convert the file using nkf and then add this new file into
        # hypermail (there should be a nicer, more efficient way to do it)
        if ($filter_message) {
            my $command;
 
            $command ="$nkf -e -J $filename"
                ." >$archive_dir/$latest_mail";
            system ($command);
            $filename = "$archive_dir/$latest_mail";
        }

	open (IN, "$mail_dir/$fileNames[$i]")  
	    or die "Can't open $mail_dir/$fileNames[$i]: $!";
 	# send the From line as is
	$_ = <IN>;
	print OUT $_;
	# write the rest of the messages, IETF-escaped	
	while (<IN>) {
	    print OUT ">", $_;
	}
	close (IN);

        # we remove the temporary nkf translated file
        if ($filter_message && $do_filter) {
            unlink ("$filename");
        }

    }
	
    close (OUT);
	
    if ($progress) {
	print "\r", " " x 70;
	print "\r\uConcatenated ", $last_msg - $first_msg , " files\n";
    }
} ## concatenateMails
    


sub by_number {
        $a <=> $b;
} ## by_number
 

#('s:f:l:t:n:j:');
sub getOpts {
    local($_);
    my $curr;
    my $period;

    while (@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
        $curr = shift(@ARGV);
	if ($curr eq "-list") {
	    $list_name = shift(@ARGV);
	} elsif ($curr eq "-pdir") {
	    $period = shift(@ARGV);	    
	} elsif ($curr eq "-first") {
	    $first_msg = shift(@ARGV);	    
	} elsif ($curr eq "-last") {
	    $last_msg = shift(@ARGV);	    
	} elsif ($curr eq "-overwrite") {
	    $overwrite = 1;
	} elsif ($curr eq "-quiet") {
	    $progress = 0;
	} elsif ($curr eq "-desc") {
	    $verbose_name = shift(@ARGV);
	    while (defined ($curr = shift(@ARGV))) {
		$verbose_name = $verbose_name . " " . $curr;
	    }
	} elsif ($curr eq "-archive_name") {
	    $archive_name = shift(@ARGV);
	}
	
    }
} ## getOps

